In [31]:
from pathlib import Path
from collections import Counter
import os
import re
import random
import subprocess

import anndata
import scanpy as sc
import pandas as pd
import numpy as np

import mgitools.os_helpers as os_helpers

In [32]:
# !pip install git+https://github.com/estorrs/mgitools
# !pip install tensorflow==2.1.0

In [33]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
%autoreload 2

In [35]:
# !pip install -e /home/estorrs/pollock/
import pollock
from pollock.models.model import PollockDataset, PollockModel, load_from_directory, predict_from_anndata

In [36]:
# !conda install -y scanpy

In [37]:
# !pip install git+https://github.com/estorrs/mgitools

In [93]:
CELL_TYPE_KEY = 'cell_type'
N_PER_CELL_TYPE = 200
DATA_DIR = '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_1/'
RESULTS_DIR = '/home/estorrs/pollock/benchmarking/results/10272020_teir1'
RESULTS_CROSS_DISEASE_DIR = '/home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_disease'
RESULTS_CROSS_DTYPE_DIR = '/home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype'
SANDBOX_DIR = '/home/estorrs/pollock/benchmarking/sandbox'

Path(RESULTS_DIR).mkdir(parents=True, exist_ok=True)
Path(RESULTS_CROSS_DISEASE_DIR).mkdir(parents=True, exist_ok=True)
Path(RESULTS_CROSS_DTYPE_DIR).mkdir(parents=True, exist_ok=True)

##### create training and validation datasets

only run if you haven't created these datasets yet

In [23]:
def cap_list(ls, n=100, split=.8, oversample=True):
    """
    Grabs items from a pool.
    
    if split * pool size is greater than n, then just randomly sample 80% of the pool
    otherwise sample 80% of the pool, then oversample so you end up with a final size of n
    """
    # just return list if it is of length 1
    if len(ls) <= 1: return ls
    cap = int(len(ls) * split)
    if cap > n:
        return random.sample(ls, n)

    if oversample:
        pool = random.sample(ls, cap)
        ## oversample to
        return random.choices(pool, k=n)

    return random.sample(ls, cap)

def balancedish_training_generator(adata, cell_type_key, n_per_cell_type, oversample=True, split=.8):
    """
    Return balanced train and validation sets
    """
    cell_type_to_idxs = {}
    for cell_id, cell_type in zip(adata.obs.index, adata.obs[cell_type_key]):
        if cell_type not in cell_type_to_idxs:
            cell_type_to_idxs[cell_type] = [cell_id]
        else:
            cell_type_to_idxs[cell_type].append(cell_id)

    cell_type_to_idxs = {k:cap_list(ls, n_per_cell_type, oversample=oversample, split=split)
                         for k, ls in cell_type_to_idxs.items()}

    train_ids = np.asarray([x for ls in cell_type_to_idxs.values() for x in ls])
    train_idxs = np.arange(adata.shape[0])[np.isin(np.asarray(adata.obs.index), train_ids)]
    val_idxs = np.delete(np.arange(adata.shape[0]), train_idxs)

    train_adata = adata[train_idxs, :]
    val_adata = adata[val_idxs, :]

    return train_adata, val_adata

# def create_train_val_datasets(adata, cell_type_key, oversample=True):
#     counts = Counter(adata.obs[cell_type_key])
#     min_count = counts.most_common()[-1][1]
#     n_per_cell_type = max(min_count, )
#     train_adata, val_adata = balancedish_training_generator(adata, cell_type_key,
#                                                             n_per_cell_type, oversample=oversample)
#     return train_adata, val_adata

In [24]:
fps = sorted(os_helpers.listfiles(DATA_DIR, regex='.h5ad$'))
fp_map = {fp.split('/')[-2]:{} for fp in fps}
for fp in fps:
    if '/_train.h5ad' not in fp and '/_val.h5ad' not in fp:
        dtype = fp.split('/')[-2]
        disease = fp.split('/')[-1].replace('.h5ad', '')
        fp_map[dtype][disease] = fp
fp_map

{'scRNAseq': {'brca': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/scRNAseq/brca.h5ad',
  'cesc': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/scRNAseq/cesc.h5ad',
  'hnscc': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/scRNAseq/hnscc.h5ad',
  'melanoma': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/scRNAseq/melanoma.h5ad',
  'pbmc': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/scRNAseq/pbmc.h5ad',
  'pdac': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/scRNAseq/pdac.h5ad'},
 'snATACseq': {'brca': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/snATACseq/brca.h5ad',
  'ccrcc': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/snATACseq/ccrcc.h5ad',
  'gbm': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_2/snATACseq/gbm.h5ad'},
 'snRNAseq': {'brca': '/home/estorrs/pollock/benchmarking/data/102320

In [25]:
for dtype, d in fp_map.items():
    for disease, fp in d.items():
        print(dtype, disease)
        adata = sc.read_h5ad(fp)
        # check for cell type key
        if CELL_TYPE_KEY not in adata.obs: raise RuntimeError(f'{CELL_TYPE_KEY} not in {fp}')
        
        train_adata, val_adata = balancedish_training_generator(adata, CELL_TYPE_KEY, N_PER_CELL_TYPE)
        # resample validation data to make dataset smaller while keeping rare cell types
        val_adata, _ = balancedish_training_generator(val_adata, CELL_TYPE_KEY, 1000, oversample=False,
                                                     split=1.)
        train_adata.write_h5ad(fp.replace('.h5ad', '_train.h5ad'))
        val_adata.write_h5ad(fp.replace('.h5ad', '_val.h5ad'))
        

scRNAseq brca
scRNAseq cesc
scRNAseq hnscc
scRNAseq melanoma
scRNAseq pbmc
scRNAseq pdac
snATACseq brca
snATACseq ccrcc
snATACseq gbm
snRNAseq brca
snRNAseq ccrcc
snRNAseq gbm


##### load in training and validation datasets

In [71]:
fps = sorted(os_helpers.listfiles(DATA_DIR, regex='.h5ad$'))
adata_map = {fp.split('/')[-2]:{} for fp in fps}
for fp in fps:
    dtype = fp.split('/')[-2]
    disease = re.sub(r'^(.*)((_train)|(_val)).h5ad$', r'\1', fp.split('/')[-1])
    if disease not in adata_map[dtype] and '.h5ad' not in disease: adata_map[dtype][disease] = {}
    if 'train.h5ad' in fp:
        adata_map[dtype][disease]['train'] = fp
    if 'val.h5ad' in fp:
        adata_map[dtype][disease]['val'] = fp
for dtype, d in adata_map.items():
    for disease, m in d.items():
        print(dtype, disease)

scRNAseq brca
scRNAseq cesc
scRNAseq hnscc
scRNAseq melanoma
scRNAseq pbmc
scRNAseq pdac
snATACseq brca
snATACseq ccrcc
snATACseq gbm
snRNAseq brca
snRNAseq ccrcc
snRNAseq gbm


In [72]:
adata_map['snATACseq'].pop('ccrcc')

{'train': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_1/snATACseq/ccrcc_train.h5ad',
 'val': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_1/snATACseq/ccrcc_val.h5ad'}

### run workflows

In [92]:
def run_workflow_for_datasets(adata_map, workflow, workflow_identifier, output_dir):
    for dtype, d in adata_map.items():
#         if dtype != 'snATACseq':
        for disease, m in d.items():
            # make dir if doesnt exist yet
            directory = os.path.join(output_dir, dtype, disease)
            Path(directory).mkdir(parents=True, exist_ok=True)
            train, val = sc.read_h5ad(m['train']), sc.read_h5ad(m['val'])

            print(dtype, disease, train.shape, val.shape)
            run_workflow(workflow, workflow_identifier,
                train, val, directory)
            
def run_workflow_for_cross_disease(adata_map, workflow, workflow_identifier, output_dir):
    for dtype, d in adata_map.items():
        for disease1, m1 in d.items():
            for disease2, m2 in d.items():
                if disease1 != disease2:
                    # make dir if doesnt exist yet
                    directory = os.path.join(output_dir, dtype, f'{disease1}_train_{disease2}_val')
                    Path(directory).mkdir(parents=True, exist_ok=True)
                    train, val = sc.read_h5ad(m1['train']), sc.read_h5ad(m2['val'])

                    print(dtype, f'{disease1}_train_{disease2}_val', train.shape, val.shape)
                    run_workflow(workflow, workflow_identifier,
                        train, val, directory)
                    
                    
def run_workflow_for_cross_datatype(adata_map, workflow, workflow_identifier, output_dir):
    for dtype1, d1 in adata_map.items():
        for dtype2, d2 in adata_map.items():
            for disease1, m1 in d1.items():
                for disease2, m2 in d2.items():
                    if dtype1 != dtype2:
                        # make dir if doesnt exist yet
                        directory = os.path.join(output_dir, dtype, f'{dtype1}_{disease1}_train_{dtype2}_{disease2}_val')
                        Path(directory).mkdir(parents=True, exist_ok=True)
                        train, val = sc.read_h5ad(m1['train']), sc.read_h5ad(m2['val'])

                        print(dtype, f'{dtype1}_{disease1}_train_{dtype2}_{disease2}_val', train.shape, val.shape)
                        run_workflow(workflow, workflow_identifier,
                            train, val, directory)

def run_workflow(workflow, workflow_identifier, train, val, output_dir):
    """
    Run the workflow defined by the workflow function.
    
    workflow function takes a train adata and a val adata as inputs,
    and returns dataframe with cell_id, groundtruth, predicted, and probability columns
    """
    # if it is pollock it needs to know where to save the module
    if workflow_identifier == 'pollock':
        df = workflow(train, val, CELL_TYPE_KEY, os.path.join(output_dir, f'{workflow_identifier}_module'))
    else:
        df = workflow(train, val, CELL_TYPE_KEY)
    df.to_csv(os.path.join(output_dir, f'{workflow_identifier}.tsv'), sep='\t', index=False, header=True)

##### pollock

In [86]:
def run_pollock_workflow(train, val, cell_type_key, module_fp):
    train.obs['is_validation'] = [False] * train.shape[0]
    val.obs['is_validation'] = [True] * val.shape[0]
    combined = train.concatenate(val)
    
    pds = PollockDataset(combined.copy(), cell_type_key=cell_type_key,
                     dataset_type='training', validation_key='is_validation')
    
    pm = PollockModel(pds.cell_types, pds.train_adata.shape[1], alpha=.0001, latent_dim=25)
    
    pm.fit(pds, epochs=20)
    
    pm.save(pds, module_fp)

    preds = predict_from_anndata(val.copy(), module_fp, adata_batch_size=10000)
    
    df = pd.DataFrame.from_dict({
        'cell_id': preds.index.to_list(),
        'groundtruth': val.obs.loc[preds.index][cell_type_key].to_list(),
        'predicted': preds['predicted_cell_type'],
        'probability': preds['cell_type_probability']
    })

    return df

In [30]:
run_workflow_for_datasets(adata_map, run_pollock_workflow, 'pollock', RESULTS_DIR)

scRNAseq brca (2600, 27131) (11253, 27131)


2020-10-29 16:02:50,078 normalizing the expression counts for model training
2020-10-29 16:03:07,475 input dataset shape: (13853, 27131)
2020-10-29 16:03:07,478 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Macrophage', 'Malignant', 'Mast', 'NK', 'Plasma', 'Treg']
2020-10-29 16:03:07,480 possible cell types: [('Endothelial', 1200), ('CD4 T cell', 1200), ('NK', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('Macrophage', 1200), ('Malignant', 1200), ('Plasma', 1200), ('Treg', 1200), ('B cell', 1200), ('Mast', 746), ('Dendritic', 592), ('Erythrocyte', 515)]
2020-10-29 16:03:14,346 training dataset shape: (6102, 27131)
2020-10-29 16:03:14,348 validation dataset shape: (7751, 27131)
2020-10-29 16:03:34,674 epoch: 1, train loss: 36.64873504638672, val loss: 38.795326232910156
2020-10-29 16:03:51,631 epoch: 2, train loss: 35.816097259521484, val loss: 37.67295837402344
2020-10-29 16:04:08,309 epoch: 3, train loss: 35

scRNAseq cesc (1939, 22928) (8451, 22928)


2020-10-29 16:09:36,397 normalizing the expression counts for model training
2020-10-29 16:09:42,886 input dataset shape: (10390, 22928)
2020-10-29 16:09:42,887 possible cell types: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
2020-10-29 16:09:42,889 possible cell types: [('Fibroblast', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Epithelial', 1200), ('NK', 1200), ('Malignant', 1200), ('Plasma', 1135), ('CD4 T cell', 1118), ('Endothelial', 732), ('Mast', 138), ('Erythrocyte', 67)]
2020-10-29 16:09:45,684 training dataset shape: (4663, 22928)
2020-10-29 16:09:45,685 validation dataset shape: (5727, 22928)
2020-10-29 16:09:56,775 epoch: 1, train loss: 37.39118576049805, val loss: 33.17431640625
2020-10-29 16:10:06,671 epoch: 2, train loss: 36.19399642944336, val loss: 32.364967346191406
2020-10-29 16:10:16,554 epoch: 3, train loss: 35.4684944152832, val loss: 31.438339233398438
2020-10-29 16:

scRNAseq hnscc (2200, 26929) (10288, 26929)


2020-10-29 16:13:43,597 normalizing the expression counts for model training
2020-10-29 16:13:52,696 input dataset shape: (12488, 26929)
2020-10-29 16:13:52,700 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-10-29 16:13:52,702 possible cell types: [('Treg', 1200), ('Plasma', 1200), ('Mast', 1200), ('B cell', 1200), ('CD4 T cell', 1200), ('Malignant', 1200), ('CD8 T cell', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('NK', 1200), ('Erythrocyte', 488)]
2020-10-29 16:13:56,412 training dataset shape: (5268, 26929)
2020-10-29 16:13:56,413 validation dataset shape: (7220, 26929)
2020-10-29 16:14:11,542 epoch: 1, train loss: 30.803085327148438, val loss: 16.803770065307617
2020-10-29 16:14:25,990 epoch: 2, train loss: 29.932323455810547, val loss: 16.530471801757812
2020-10-29 16:14:40,380 epoch: 3, train loss: 29.50837516784668, val loss: 16.330692291259766
2020-10-29 16:14:54,867 

scRNAseq melanoma (2824, 23452) (8028, 23452)


2020-10-29 16:19:28,964 normalizing the expression counts for model training
2020-10-29 16:19:35,791 input dataset shape: (10852, 23452)
2020-10-29 16:19:35,793 possible cell types: ['B cell', 'CD4 T cell naive', 'CD8 CTL', 'CD8 CTL exausted', 'CD8 follicular T-cell', 'Dendritic', 'Fibroblast', 'M1 Macrophage', 'M2 Macrophage', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg', 'pDC']
2020-10-29 16:19:35,794 possible cell types: [('CD8 CTL', 1200), ('B cell', 1200), ('CD8 CTL exausted', 1200), ('M2 Macrophage', 1200), ('Malignant', 1200), ('CD4 T cell naive', 1200), ('Treg', 862), ('Monocyte', 539), ('NK', 538), ('pDC', 478), ('Fibroblast', 321), ('Plasma', 273), ('Dendritic', 263), ('M1 Macrophage', 200), ('CD8 follicular T-cell', 178)]
2020-10-29 16:19:38,949 training dataset shape: (5289, 23452)
2020-10-29 16:19:38,950 validation dataset shape: (5563, 23452)
2020-10-29 16:19:51,993 epoch: 1, train loss: 35.766632080078125, val loss: 44.68608093261719
2020-10-29 16:20:04,155 epoch: 2, 

scRNAseq pbmc (834, 32738) (1320, 32738)


2020-10-29 16:24:12,971 normalizing the expression counts for model training
2020-10-29 16:24:14,895 input dataset shape: (2154, 32738)
2020-10-29 16:24:14,896 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'FCGR3A Monocyte', 'Megakaryocyte', 'NK']
2020-10-29 16:24:14,897 possible cell types: [('CD4 T cell', 1143), ('B cell', 341), ('CD8 T cell', 306), ('NK', 163), ('FCGR3A Monocyte', 150), ('Dendritic', 38), ('Megakaryocyte', 13)]
2020-10-29 16:24:15,475 training dataset shape: (1220, 32738)
2020-10-29 16:24:15,476 validation dataset shape: (934, 32738)




2020-10-29 16:24:20,309 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-10-29 16:24:20,397 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-10-29 16:24:20,483 6 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-10-29 16:24:20,614 7 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-10-29 16:24:21,194 epoch: 1, train loss: 47.38005065917969, val loss: 48.97697448730469
2020-10-29 16:24:25,415 epoch: 2, train loss: 45.49711608886719, val loss: 46.78955078125
2020-10-29 16:24:29,654 epoch: 3, train loss: 45.3695068359375, val loss: 46.15694808959961
2020-10-29 16:24:33,893 epoch: 4, train loss: 46.088008880615234, val loss: 45.939239501953125
2020-10-29 16:24:38,090 epoch: 5, train loss: 45.7983283996582, val loss: 45.8

scRNAseq pdac (3703, 28756) (17428, 28756)


2020-10-29 16:25:51,707 normalizing the expression counts for model training
2020-10-29 16:26:07,917 input dataset shape: (21131, 28756)
2020-10-29 16:26:07,919 possible cell types: ['ADM', 'Acinar', 'B cell', 'CD4 T cell', 'CD4 T cell naive', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Macrophage', 'Malignant', 'Mast', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-10-29 16:26:07,921 possible cell types: [('Dendritic', 1200), ('CD4 T cell naive', 1200), ('Macrophage', 1200), ('NK', 1200), ('Plasma', 1200), ('CD8 T cell', 1200), ('Malignant', 1200), ('Treg', 1200), ('Fibroblast', 1200), ('Acinar', 1200), ('Islet', 1200), ('Endothelial', 1200), ('B cell', 1200), ('CD4 T cell', 1200), ('Mast', 1200), ('ADM', 1200), ('Epithelial', 1168), ('Erythrocyte', 612), ('Tuft', 151)]
2020-10-29 16:26:16,050 training dataset shape: (8943, 28756)
2020-10-29 16:26:16,051 validation dataset shape: (12188, 28756)
2020-10-29 16:26:40,125 epoch: 1, train loss: 29.5

snATACseq brca (2552, 19891) (9304, 19891)


2020-10-29 16:35:06,960 normalizing the expression counts for model training
2020-10-29 16:35:13,357 input dataset shape: (11856, 19891)
2020-10-29 16:35:13,358 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Macrophage', 'Malignant', 'Mast', 'NK', 'Treg', 'cCAF', 'cDC', 'mCAF', 'pDC', 'vCAF']
2020-10-29 16:35:13,360 possible cell types: [('Malignant', 1200), ('mCAF', 1200), ('Macrophage', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('CD8 T cell', 1200), ('Treg', 1200), ('B cell', 1200), ('vCAF', 637), ('cDC', 519), ('pDC', 457), ('NK', 430), ('cCAF', 127), ('Mast', 86)]
2020-10-29 16:35:16,451 training dataset shape: (5502, 19891)
2020-10-29 16:35:16,452 validation dataset shape: (6354, 19891)
2020-10-29 16:35:29,482 epoch: 1, train loss: 37.69630813598633, val loss: 43.788761138916016
2020-10-29 16:35:41,436 epoch: 2, train loss: 37.37327194213867, val loss: 43.38957977294922
2020-10-29 16:35:53,423 epoch: 3, train loss: 37.03084182739258, val loss

snATACseq gbm (1316, 19891) (5650, 19891)


2020-10-29 16:40:08,714 normalizing the expression counts for model training
2020-10-29 16:40:12,470 input dataset shape: (6966, 19891)
2020-10-29 16:40:12,472 possible cell types: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-10-29 16:40:12,473 possible cell types: [('Microglia', 1200), ('Neuron', 1200), ('Malignant', 1200), ('Oligodendrocytes', 1200), ('T cells', 1200), ('Fibroblast', 810), ('Endothelial', 101), ('B cell', 55)]
2020-10-29 16:40:13,940 training dataset shape: (3124, 19891)
2020-10-29 16:40:13,941 validation dataset shape: (3842, 19891)




2020-10-29 16:40:21,281 5 out of the last 54 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-10-29 16:40:21,373 6 out of the last 55 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-10-29 16:40:22,452 epoch: 1, train loss: 44.33525085449219, val loss: 23.476886749267578
2020-10-29 16:40:29,674 epoch: 2, train loss: 44.14936828613281, val loss: 23.278709411621094
2020-10-29 16:40:36,817 epoch: 3, train loss: 43.8795280456543, val loss: 23.152862548828125
2020-10-29 16:40:43,861 epoch: 4, train loss: 43.461429595947266, val loss: 23.063217163085938
2020-10-29 16:40:50,911 epoch: 5, train loss: 43.23381042480469, val los

snRNAseq brca (2455, 29175) (9490, 29175)


2020-10-29 16:43:04,834 normalizing the expression counts for model training
2020-10-29 16:43:14,110 input dataset shape: (11945, 29175)
2020-10-29 16:43:14,112 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Macrophage', 'Malignant', 'Mast', 'NK', 'Plasma', 'Treg']
2020-10-29 16:43:14,114 possible cell types: [('Malignant', 1200), ('CD4 T cell', 1200), ('Endothelial', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('Macrophage', 1200), ('B cell', 1200), ('Treg', 1200), ('Plasma', 1200), ('NK', 482), ('Mast', 318), ('Adipocyte', 273), ('Dendritic', 72)]
2020-10-29 16:43:17,956 training dataset shape: (5247, 29175)
2020-10-29 16:43:17,957 validation dataset shape: (6698, 29175)
2020-10-29 16:43:33,817 epoch: 1, train loss: 39.688575744628906, val loss: 39.98768615722656
2020-10-29 16:43:48,882 epoch: 2, train loss: 39.03700637817383, val loss: 39.365821838378906
2020-10-29 16:44:03,968 epoch: 3, train loss: 38.41039

snRNAseq ccrcc (3867, 33538) (10444, 33538)


2020-10-29 16:49:05,670 normalizing the expression counts for model training
2020-10-29 16:49:18,413 input dataset shape: (14311, 33538)
2020-10-29 16:49:18,415 possible cell types: ['Basophil', 'CD4 CTL', 'CD4 T cell', 'CD4 T cell activated', 'CD4 T cell naive', 'CD4/CD8 proliferating', 'CD8 CTL', 'CD8 T cell preexhausted', 'Endothelial', 'Epithelial', 'Fibroblast', 'Macrophage', 'Macrophage proliferating', 'Malignant', 'NK cell strong', 'NK cell weak', 'Plasma', 'TRM', 'Treg', 'cDC', 'mCAF', 'pDC']
2020-10-29 16:49:18,417 possible cell types: [('Malignant', 1200), ('Endothelial', 1200), ('mCAF', 1200), ('Macrophage', 1200), ('Epithelial', 1200), ('CD4 T cell', 1200), ('NK cell strong', 871), ('CD4 T cell activated', 791), ('CD4 T cell naive', 725), ('Fibroblast', 639), ('Treg', 574), ('TRM', 513), ('CD4/CD8 proliferating', 466), ('Macrophage proliferating', 466), ('cDC', 447), ('CD8 CTL', 439), ('NK cell weak', 409), ('CD4 CTL', 348), ('Plasma', 208), ('CD8 T cell preexhausted', 141)



2020-10-29 16:49:44,584 5 out of the last 123 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-10-29 16:49:48,759 epoch: 1, train loss: 34.759559631347656, val loss: 50.450416564941406
2020-10-29 16:50:12,131 epoch: 2, train loss: 34.12269592285156, val loss: 49.272796630859375
2020-10-29 16:50:35,853 epoch: 3, train loss: 33.885658264160156, val loss: 48.648887634277344
2020-10-29 16:50:59,535 epoch: 4, train loss: 33.5566291809082, val loss: 48.25336456298828
2020-10-29 16:51:23,306 epoch: 5, train loss: 33.337772369384766, val l

snRNAseq gbm (1692, 29748) (6807, 29748)


2020-10-29 16:58:21,082 normalizing the expression counts for model training
2020-10-29 16:58:27,959 input dataset shape: (8499, 29748)
2020-10-29 16:58:27,961 possible cell types: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-10-29 16:58:27,962 possible cell types: [('Fibroblast', 1200), ('Neuron', 1200), ('Microglia', 1200), ('Oligodendrocytes', 1200), ('Malignant', 1200), ('T cells', 1200), ('Endothelial', 581), ('B cell', 578), ('Monocyte', 140)]
2020-10-29 16:58:30,434 training dataset shape: (3723, 29748)
2020-10-29 16:58:30,435 validation dataset shape: (4776, 29748)
2020-10-29 16:58:41,820 epoch: 1, train loss: 45.88108825683594, val loss: 47.097232818603516
2020-10-29 16:58:52,290 epoch: 2, train loss: 45.138607025146484, val loss: 46.600093841552734
2020-10-29 16:59:02,919 epoch: 3, train loss: 44.354984283447266, val loss: 46.20143508911133
2020-10-29 16:59:13,389 epoch: 4, train loss: 43.869003295

In [90]:
run_workflow_for_cross_disease(adata_map, run_pollock_workflow, 'pollock', RESULTS_CROSS_DISEASE_DIR)

scRNAseq brca_train_cesc_val (2600, 27131) (8449, 22928)


2020-11-03 10:05:32,144 normalizing the expression counts for model training
2020-11-03 10:05:39,440 input dataset shape: (11049, 22662)
2020-11-03 10:05:39,442 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:05:39,444 possible cell types: [('NK', 1200), ('CD8 T cell', 1200), ('Fibroblast', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('Plasma', 1135), ('CD4 T cell', 1118), ('Epithelial', 1000), ('Endothelial', 732), ('Mast', 249), ('Erythrocyte', 215), ('Treg', 200), ('B cell', 200), ('Dendritic', 200)]
2020-11-03 10:05:42,660 training dataset shape: (5315, 22662)
2020-11-03 10:05:42,662 validation dataset shape: (5734, 22662)
2020-11-03 10:05:55,843 epoch: 1, train loss: 34.00298309326172, val loss: 22.467525482177734
2020-11-03 10:06:07,968 epoch: 2, train loss: 32.6106071472168, val loss: 21.930818557739258
2020-11-03 10:06:20,

scRNAseq brca_train_hnscc_val (2600, 27131) (10288, 26929)


2020-11-03 10:10:27,648 normalizing the expression counts for model training
2020-11-03 10:10:37,265 input dataset shape: (12888, 25823)
2020-11-03 10:10:37,267 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:10:37,269 possible cell types: [('NK', 1200), ('CD8 T cell', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('B cell', 1200), ('Mast', 1200), ('Plasma', 1200), ('Erythrocyte', 488), ('Fibroblast', 200), ('Dendritic', 200)]
2020-11-03 10:10:41,132 training dataset shape: (5596, 25823)
2020-11-03 10:10:41,134 validation dataset shape: (7292, 25823)
2020-11-03 10:10:56,815 epoch: 1, train loss: 30.21483612060547, val loss: 24.32537078857422
2020-11-03 10:11:11,807 epoch: 2, train loss: 29.540552139282227, val loss: 23.86591339111328
2020-11-03 10:11:26,608 epoch: 3, train loss: 29.0252

scRNAseq brca_train_melanoma_val (2600, 27131) (6735, 23452)


2020-11-03 10:16:24,276 normalizing the expression counts for model training
2020-11-03 10:16:30,130 input dataset shape: (9335, 21538)
2020-11-03 10:16:30,132 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:16:30,133 possible cell types: [('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('B cell', 1200), ('Treg', 862), ('Dendritic', 741), ('NK', 538), ('Fibroblast', 321), ('Plasma', 273), ('Endothelial', 200), ('Mast', 200), ('Erythrocyte', 200)]
2020-11-03 10:16:32,486 training dataset shape: (4676, 21538)
2020-11-03 10:16:32,487 validation dataset shape: (4659, 21538)
2020-11-03 10:16:43,964 epoch: 1, train loss: 36.257877349853516, val loss: 46.29212951660156
2020-11-03 10:16:54,352 epoch: 2, train loss: 35.411956787109375, val loss: 45.69017791748047
2020-11-03 10:17:04,734 epoch: 3, train loss: 34.414100646

scRNAseq brca_train_pbmc_val (2600, 27131) (1698, 32738)


2020-11-03 10:20:27,546 normalizing the expression counts for model training
2020-11-03 10:20:29,854 input dataset shape: (4298, 18511)
2020-11-03 10:20:29,856 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:20:29,857 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('B cell', 341), ('CD8 T cell', 306), ('NK', 263), ('Dendritic', 208), ('Endothelial', 200), ('Treg', 200), ('Fibroblast', 200), ('Malignant', 200), ('Mast', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Megakaryocyte', 3)]
2020-11-03 10:20:30,844 training dataset shape: (2857, 18511)
2020-11-03 10:20:30,845 validation dataset shape: (1441, 18511)




2020-11-03 10:20:37,022 5 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:20:37,199 5 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:20:37,303 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 10:20:38,505 epoch: 1, train loss: 36.03363800048828, val loss: 45.208377838134766
2020-11-03 10:20:44,841 epoch: 2, train loss: 35.55154037475586, val loss: 44.70466232299805
2020-11-03 10:20:51,518 epoch: 3, train loss: 35.25004959106445, val loss: 44.343505859375
2020-11-03 10:20:57,978 epoch: 4, train loss: 34.761714935302734, val loss: 43.75205993652344
2020-11-03 10:21:04,394 epoch: 5, train loss: 34.37222671508789, val loss: 4

scRNAseq brca_train_pdac_val (2600, 27131) (15435, 28756)


2020-11-03 10:22:50,884 normalizing the expression counts for model training
2020-11-03 10:23:04,675 input dataset shape: (18035, 26833)
2020-11-03 10:23:04,678 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 10:23:04,681 possible cell types: [('NK', 1200), ('CD8 T cell', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Fibroblast', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('B cell', 1200), ('Mast', 1200), ('Plasma', 1200), ('Dendritic', 1200), ('Acinar', 1000), ('Islet', 1000), ('Epithelial', 968), ('Erythrocyte', 612), ('Tuft', 55)]
2020-11-03 10:23:11,003 training dataset shape: (7848, 26833)
2020-11-03 10:23:11,004 validation dataset shape: (10187, 26833)
2020-11-03 10:23:32,593 epoch: 1, train loss: 30.22702980041504, val loss: 26.084396362304688
2020-11-03 10:23:53,335 epoch: 2

scRNAseq cesc_train_brca_val (1941, 22928) (11253, 27131)


2020-11-03 10:31:16,411 normalizing the expression counts for model training
2020-11-03 10:31:25,071 input dataset shape: (13194, 22662)
2020-11-03 10:31:25,073 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:31:25,076 possible cell types: [('Fibroblast', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('NK', 1200), ('Treg', 1000), ('B cell', 1000), ('Mast', 635), ('Dendritic', 392), ('Erythrocyte', 367), ('Epithelial', 200)]
2020-11-03 10:31:28,885 training dataset shape: (6135, 22662)
2020-11-03 10:31:28,886 validation dataset shape: (7059, 22662)
2020-11-03 10:31:43,517 epoch: 1, train loss: 30.561330795288086, val loss: 44.92002868652344
2020-11-03 10:31:57,614 epoch: 2, train loss: 29.781095504760742, val loss: 43.5384521484375
2020-11-03 10:32:1

scRNAseq cesc_train_hnscc_val (1941, 22928) (10288, 26929)


2020-11-03 10:36:53,455 normalizing the expression counts for model training
2020-11-03 10:37:01,370 input dataset shape: (12229, 22756)
2020-11-03 10:37:01,372 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:37:01,374 possible cell types: [('Endothelial', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('NK', 1200), ('Mast', 1089), ('Treg', 1000), ('B cell', 1000), ('Erythrocyte', 340), ('Fibroblast', 200), ('Epithelial', 200)]
2020-11-03 10:37:04,833 training dataset shape: (5539, 22756)
2020-11-03 10:37:04,834 validation dataset shape: (6690, 22756)
2020-11-03 10:37:17,877 epoch: 1, train loss: 30.53839111328125, val loss: 37.41382598876953
2020-11-03 10:37:29,689 epoch: 2, train loss: 29.597204208374023, val loss: 36.502113342285156
2020-11-03 10:37:41,769 epoch: 3, train loss: 29.2

scRNAseq cesc_train_melanoma_val (1941, 22928) (6735, 23452)


2020-11-03 10:41:49,253 normalizing the expression counts for model training
2020-11-03 10:41:54,263 input dataset shape: (8676, 20046)
2020-11-03 10:41:54,265 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:41:54,267 possible cell types: [('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Malignant', 1200), ('B cell', 1000), ('Treg', 662), ('Dendritic', 541), ('NK', 538), ('Fibroblast', 321), ('Plasma', 273), ('Endothelial', 200), ('Epithelial', 200), ('Mast', 89), ('Erythrocyte', 52)]
2020-11-03 10:41:56,423 training dataset shape: (4410, 20046)
2020-11-03 10:41:56,424 validation dataset shape: (4266, 20046)




2020-11-03 10:42:05,522 5 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:42:05,755 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 10:42:07,099 epoch: 1, train loss: 30.1329288482666, val loss: 45.5561637878418
2020-11-03 10:42:16,659 epoch: 2, train loss: 29.309389114379883, val loss: 44.859954833984375
2020-11-03 10:42:26,204 epoch: 3, train loss: 28.941333770751953, val loss: 44.25425720214844
2020-11-03 10:42:35,777 epoch: 4, train loss: 28.483665466308594, val loss: 43.5316276550293
2020-11-03 10:42:45,386 epoch: 5, train loss: 28.12422752380371, val loss: 

scRNAseq cesc_train_pbmc_val (1941, 22928) (1698, 32738)


2020-11-03 10:45:32,969 normalizing the expression counts for model training
2020-11-03 10:45:34,765 input dataset shape: (3639, 17080)
2020-11-03 10:45:34,766 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma']
2020-11-03 10:45:34,767 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('CD8 T cell', 306), ('NK', 263), ('Fibroblast', 200), ('Endothelial', 200), ('Epithelial', 200), ('Plasma', 200), ('Malignant', 200), ('B cell', 141), ('Mast', 89), ('Erythrocyte', 52), ('Dendritic', 8), ('Megakaryocyte', 3)]
2020-11-03 10:45:35,531 training dataset shape: (2395, 17080)
2020-11-03 10:45:35,532 validation dataset shape: (1244, 17080)




2020-11-03 10:45:40,405 5 out of the last 47 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:40,489 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:40,605 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:40,749 5 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:40,837 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:40,959 7 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:41,077 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:41,187 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 10:45:41,300 7 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 10:45:42,294 epoch: 1, train loss: 34.411983489990234, val loss: 33.98374938964844
2020-11-03 10:45:47,178 epoch: 2, train loss: 33.73344802856445, val loss: 33.42430877685547
2020-11-03 10:45:52,255 epoch: 3, train loss: 33.35572052001953, val loss: 33.25275802612305
2020-11-03 10:45:57,187 epoch: 4, train loss: 32.82532501220703, val loss: 32.734100341796875
2020-11-03 10:46:02,075 epoch: 5, train loss: 32.31083297729492, val loss:

scRNAseq cesc_train_pdac_val (1941, 22928) (15435, 28756)


2020-11-03 10:47:26,742 normalizing the expression counts for model training
2020-11-03 10:47:38,217 input dataset shape: (17376, 22794)
2020-11-03 10:47:38,220 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 10:47:38,222 possible cell types: [('Fibroblast', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('NK', 1200), ('Epithelial', 1168), ('Mast', 1089), ('Dendritic', 1000), ('Treg', 1000), ('B cell', 1000), ('Acinar', 1000), ('Islet', 1000), ('Erythrocyte', 464), ('Tuft', 55)]
2020-11-03 10:47:43,918 training dataset shape: (7816, 22794)
2020-11-03 10:47:43,919 validation dataset shape: (9560, 22794)
2020-11-03 10:48:02,030 epoch: 1, train loss: 28.499753952026367, val loss: 43.17182159423828
2020-11-03 10:48:19,174 epoch: 2

scRNAseq hnscc_train_brca_val (2200, 26929) (11253, 27131)


2020-11-03 10:54:36,217 normalizing the expression counts for model training
2020-11-03 10:54:46,215 input dataset shape: (13453, 25823)
2020-11-03 10:54:46,217 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 10:54:46,218 possible cell types: [('B cell', 1200), ('CD4 T cell', 1200), ('Endothelial', 1200), ('Treg', 1200), ('Plasma', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1200), ('NK', 1200), ('Fibroblast', 1000), ('Mast', 746), ('Erythrocyte', 515), ('Dendritic', 392)]
2020-11-03 10:54:50,213 training dataset shape: (6039, 25823)
2020-11-03 10:54:50,213 validation dataset shape: (7414, 25823)
2020-11-03 10:55:06,535 epoch: 1, train loss: 32.967586517333984, val loss: 24.706241607666016
2020-11-03 10:55:22,279 epoch: 2, train loss: 32.22248077392578, val loss: 24.343902587890625
2020-11-03 10:55:38,023 epoch: 3, train loss: 31.72

scRNAseq hnscc_train_cesc_val (2200, 26929) (8449, 22928)


2020-11-03 11:00:52,856 normalizing the expression counts for model training
2020-11-03 11:00:59,726 input dataset shape: (10649, 22756)
2020-11-03 11:00:59,728 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:00:59,730 possible cell types: [('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1200), ('NK', 1200), ('Plasma', 1135), ('CD4 T cell', 1118), ('Fibroblast', 1000), ('Epithelial', 1000), ('Endothelial', 732), ('Mast', 249), ('Erythrocyte', 215), ('B cell', 200), ('Treg', 200)]
2020-11-03 11:01:02,810 training dataset shape: (5137, 22756)
2020-11-03 11:01:02,811 validation dataset shape: (5512, 22756)
2020-11-03 11:01:14,759 epoch: 1, train loss: 34.82024002075195, val loss: 13.963593482971191
2020-11-03 11:01:25,681 epoch: 2, train loss: 33.665592193603516, val loss: 13.61395263671875
2020-11-03 11:01:36,782 epoch: 3, train loss: 33.012

scRNAseq hnscc_train_melanoma_val (2200, 26929) (6735, 23452)


2020-11-03 11:05:24,346 normalizing the expression counts for model training
2020-11-03 11:05:29,968 input dataset shape: (8935, 21555)
2020-11-03 11:05:29,970 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:05:29,972 possible cell types: [('B cell', 1200), ('CD4 T cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1200), ('Treg', 862), ('Dendritic', 541), ('NK', 538), ('Plasma', 273), ('Endothelial', 200), ('Mast', 200), ('Erythrocyte', 200), ('Fibroblast', 121)]
2020-11-03 11:05:32,174 training dataset shape: (4327, 21555)
2020-11-03 11:05:32,175 validation dataset shape: (4608, 21555)




2020-11-03 11:05:42,015 5 out of the last 18 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 11:05:43,419 epoch: 1, train loss: 31.439708709716797, val loss: 28.568798065185547
2020-11-03 11:05:53,627 epoch: 2, train loss: 30.749340057373047, val loss: 28.34649658203125
2020-11-03 11:06:03,936 epoch: 3, train loss: 30.326318740844727, val loss: 28.044658660888672
2020-11-03 11:06:14,121 epoch: 4, train loss: 29.89132308959961, val loss: 27.794782638549805
2020-11-03 11:06:24,275 epoch: 5, train loss: 29.569271087646484, val 

scRNAseq hnscc_train_pbmc_val (2200, 26929) (1698, 32738)


2020-11-03 11:09:20,090 normalizing the expression counts for model training
2020-11-03 11:09:22,212 input dataset shape: (3898, 18521)
2020-11-03 11:09:22,213 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:09:22,214 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('B cell', 341), ('CD8 T cell', 306), ('NK', 263), ('Endothelial', 200), ('Treg', 200), ('Plasma', 200), ('Mast', 200), ('Malignant', 200), ('Erythrocyte', 200), ('Dendritic', 8), ('Megakaryocyte', 3)]
2020-11-03 11:09:23,019 training dataset shape: (2576, 18521)
2020-11-03 11:09:23,020 validation dataset shape: (1322, 18521)




2020-11-03 11:09:28,548 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:09:28,666 5 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:09:28,832 6 out of the last 16 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:09:28,954 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 11:09:29,964 epoch: 1, train loss: 32.29396057128906, val loss: 23.20221710205078
2020-11-03 11:09:35,661 epoch: 2, train loss: 31.80658721923828, val loss: 22.73488998413086
2020-11-03 11:09:41,380 epoch: 3, train loss: 31.429758071899414, val loss: 22.590343475341797
2020-11-03 11:09:47,037 epoch: 4, train loss: 31.217084884643555, val loss: 22.506620407104492
2020-11-03 11:09:52,747 epoch: 5, train loss: 31.052335739135742, val lo

scRNAseq hnscc_train_pdac_val (2200, 26929) (15435, 28756)


2020-11-03 11:11:28,835 normalizing the expression counts for model training
2020-11-03 11:11:42,322 input dataset shape: (17635, 26359)
2020-11-03 11:11:42,324 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 11:11:42,327 possible cell types: [('B cell', 1200), ('CD4 T cell', 1200), ('Endothelial', 1200), ('Treg', 1200), ('Plasma', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Mast', 1200), ('Malignant', 1200), ('NK', 1200), ('Dendritic', 1000), ('Fibroblast', 1000), ('Acinar', 1000), ('Islet', 1000), ('Epithelial', 968), ('Erythrocyte', 612), ('Tuft', 55)]
2020-11-03 11:11:48,354 training dataset shape: (7864, 26359)
2020-11-03 11:11:48,355 validation dataset shape: (9771, 26359)
2020-11-03 11:12:09,573 epoch: 1, train loss: 29.402570724487305, val loss: 19.21784210205078
2020-11-03 11:12:29,790 epoch: 2,

scRNAseq melanoma_train_brca_val (2000, 23452) (11253, 27131)


2020-11-03 11:19:45,474 normalizing the expression counts for model training
2020-11-03 11:19:53,700 input dataset shape: (13253, 21538)
2020-11-03 11:19:53,702 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:19:53,705 possible cell types: [('Malignant', 1200), ('Treg', 1200), ('Plasma', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Fibroblast', 1200), ('CD4 T cell', 1200), ('NK', 1200), ('Monocyte', 1200), ('Endothelial', 1000), ('Dendritic', 592), ('Mast', 546), ('Erythrocyte', 315)]
2020-11-03 11:19:57,413 training dataset shape: (5833, 21538)
2020-11-03 11:19:57,414 validation dataset shape: (7420, 21538)
2020-11-03 11:20:11,173 epoch: 1, train loss: 34.6512565612793, val loss: 32.0250244140625
2020-11-03 11:20:23,836 epoch: 2, train loss: 33.83558654785156, val loss: 31.245752334594727
2020-11-03 11:20:36,504 epoch: 3, train loss: 33.359344

scRNAseq melanoma_train_cesc_val (2000, 23452) (8449, 22928)


2020-11-03 11:24:55,684 normalizing the expression counts for model training
2020-11-03 11:25:01,824 input dataset shape: (10449, 20046)
2020-11-03 11:25:01,826 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:25:01,828 possible cell types: [('Malignant', 1200), ('CD8 T cell', 1200), ('Fibroblast', 1200), ('NK', 1200), ('Monocyte', 1200), ('Plasma', 1135), ('CD4 T cell', 1118), ('Epithelial', 1000), ('Endothelial', 532), ('Treg', 200), ('B cell', 200), ('Dendritic', 200), ('Mast', 49), ('Erythrocyte', 15)]
2020-11-03 11:25:04,677 training dataset shape: (4805, 20046)
2020-11-03 11:25:04,678 validation dataset shape: (5644, 20046)




2020-11-03 11:25:14,882 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 11:25:16,164 epoch: 1, train loss: 32.98252487182617, val loss: 20.59423828125
2020-11-03 11:25:26,446 epoch: 2, train loss: 31.996253967285156, val loss: 20.15383529663086
2020-11-03 11:25:36,789 epoch: 3, train loss: 31.346406936645508, val loss: 19.829742431640625
2020-11-03 11:25:47,191 epoch: 4, train loss: 30.787559509277344, val loss: 19.63329315185547
2020-11-03 11:25:57,530 epoch: 5, train loss: 30.610082626342773, val loss:

scRNAseq melanoma_train_hnscc_val (2000, 23452) (10288, 26929)


2020-11-03 11:29:10,802 normalizing the expression counts for model training
2020-11-03 11:29:18,468 input dataset shape: (12288, 21555)
2020-11-03 11:29:18,470 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:29:18,471 possible cell types: [('Malignant', 1200), ('Treg', 1200), ('Plasma', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('NK', 1200), ('Monocyte', 1200), ('Mast', 1000), ('Endothelial', 1000), ('Erythrocyte', 288), ('Fibroblast', 200), ('Dendritic', 200)]
2020-11-03 11:29:21,971 training dataset shape: (5504, 21555)
2020-11-03 11:29:21,972 validation dataset shape: (6784, 21555)
2020-11-03 11:29:35,555 epoch: 1, train loss: 28.969152450561523, val loss: 20.247398376464844
2020-11-03 11:29:48,012 epoch: 2, train loss: 28.18299102783203, val loss: 19.865365982055664
2020-11-03 11:30:00,663 epoch: 3, train loss: 27.72

scRNAseq melanoma_train_pbmc_val (2000, 23452) (1698, 32738)


2020-11-03 11:34:10,616 normalizing the expression counts for model training
2020-11-03 11:34:12,430 input dataset shape: (3698, 16804)
2020-11-03 11:34:12,432 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:34:12,433 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('B cell', 341), ('CD8 T cell', 306), ('NK', 263), ('Dendritic', 208), ('Malignant', 200), ('Treg', 200), ('Plasma', 200), ('Fibroblast', 200), ('Megakaryocyte', 3)]
2020-11-03 11:34:13,113 training dataset shape: (2406, 16804)
2020-11-03 11:34:13,115 validation dataset shape: (1292, 16804)




2020-11-03 11:34:18,140 5 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 11:34:19,108 epoch: 1, train loss: 36.28264236450195, val loss: 44.78963088989258
2020-11-03 11:34:23,906 epoch: 2, train loss: 35.32258224487305, val loss: 43.80751037597656
2020-11-03 11:34:28,724 epoch: 3, train loss: 35.197113037109375, val loss: 43.63060760498047
2020-11-03 11:34:33,520 epoch: 4, train loss: 34.8891716003418, val loss: 43.217071533203125
2020-11-03 11:34:38,239 epoch: 5, train loss: 34.6039924621582, val loss: 4

scRNAseq melanoma_train_pdac_val (2000, 23452) (15435, 28756)


2020-11-03 11:35:59,139 normalizing the expression counts for model training
2020-11-03 11:36:10,156 input dataset shape: (17435, 21725)
2020-11-03 11:36:10,158 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 11:36:10,161 possible cell types: [('Malignant', 1200), ('Treg', 1200), ('Plasma', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Fibroblast', 1200), ('CD4 T cell', 1200), ('NK', 1200), ('Dendritic', 1200), ('Monocyte', 1200), ('Endothelial', 1000), ('Acinar', 1000), ('Islet', 1000), ('Mast', 1000), ('Epithelial', 968), ('Erythrocyte', 412), ('Tuft', 55)]
2020-11-03 11:36:15,697 training dataset shape: (7800, 21725)
2020-11-03 11:36:15,698 validation dataset shape: (9635, 21725)
2020-11-03 11:36:33,746 epoch: 1, train loss: 29.12691307067871, val loss: 22.703203201293945
2020-11-03 11:36:50,888 epoch: 2,

scRNAseq pbmc_train_brca_val (940, 32738) (11253, 27131)


2020-11-03 11:42:57,804 normalizing the expression counts for model training
2020-11-03 11:43:04,400 input dataset shape: (12193, 18511)
2020-11-03 11:43:04,402 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:43:04,404 possible cell types: [('CD4 T cell', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('NK', 1100), ('Endothelial', 1000), ('Treg', 1000), ('Malignant', 1000), ('Fibroblast', 1000), ('Plasma', 1000), ('Mast', 546), ('Dendritic', 422), ('Erythrocyte', 315), ('Megakaryocyte', 10)]
2020-11-03 11:43:07,622 training dataset shape: (5770, 18511)
2020-11-03 11:43:07,623 validation dataset shape: (6423, 18511)
2020-11-03 11:43:20,093 epoch: 1, train loss: 29.70446014404297, val loss: 15.315662384033203
2020-11-03 11:43:31,682 epoch: 2, train loss: 29.04641342163086, val loss: 15.071821212768555
2020-11-03 1

scRNAseq pbmc_train_cesc_val (940, 32738) (8449, 22928)


2020-11-03 11:47:38,882 normalizing the expression counts for model training
2020-11-03 11:47:43,540 input dataset shape: (9389, 17080)
2020-11-03 11:47:43,542 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma']
2020-11-03 11:47:43,544 possible cell types: [('CD8 T cell', 1200), ('Monocyte', 1200), ('CD4 T cell', 1118), ('NK', 1100), ('Fibroblast', 1000), ('Epithelial', 1000), ('Malignant', 1000), ('Plasma', 935), ('Endothelial', 532), ('B cell', 200), ('Mast', 49), ('Dendritic', 30), ('Erythrocyte', 15), ('Megakaryocyte', 10)]
2020-11-03 11:47:45,865 training dataset shape: (4531, 17080)
2020-11-03 11:47:45,866 validation dataset shape: (4858, 17080)




2020-11-03 11:47:53,829 5 out of the last 80 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:47:53,915 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:47:54,204 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:47:54,515 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:47:54,600 6 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 11:47:54,687 7 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 11:47:55,755 epoch: 1, train loss: 30.160335540771484, val loss: 8.557684898376465
2020-11-03 11:48:04,110 epoch: 2, train loss: 29.25855255126953, val loss: 8.365677833557129
2020-11-03 11:48:12,494 epoch: 3, train loss: 28.73624610900879, val loss: 8.235435485839844
2020-11-03 11:48:20,821 epoch: 4, train loss: 28.32094955444336, val loss: 8.191970825195312
2020-11-03 11:48:29,227 epoch: 5, train loss: 28.01756477355957, val loss: 

scRNAseq pbmc_train_hnscc_val (940, 32738) (10288, 26929)


2020-11-03 11:51:04,743 normalizing the expression counts for model training
2020-11-03 11:51:10,811 input dataset shape: (11228, 18521)
2020-11-03 11:51:10,813 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:51:10,815 possible cell types: [('CD4 T cell', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('NK', 1100), ('Treg', 1000), ('Plasma', 1000), ('Mast', 1000), ('Endothelial', 1000), ('Malignant', 1000), ('Erythrocyte', 288), ('Dendritic', 30), ('Megakaryocyte', 10)]
2020-11-03 11:51:13,602 training dataset shape: (5235, 18521)
2020-11-03 11:51:13,603 validation dataset shape: (5993, 18521)
2020-11-03 11:51:25,124 epoch: 1, train loss: 25.005624771118164, val loss: 9.906452178955078
2020-11-03 11:51:35,644 epoch: 2, train loss: 24.4320011138916, val loss: 9.679719924926758
2020-11-03 11:51:46,127 epoch: 3, train loss: 24.0

scRNAseq pbmc_train_melanoma_val (940, 32738) (6735, 23452)


2020-11-03 11:55:20,434 normalizing the expression counts for model training
2020-11-03 11:55:24,198 input dataset shape: (7675, 16804)
2020-11-03 11:55:24,200 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 11:55:24,201 possible cell types: [('CD4 T cell', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1000), ('Treg', 662), ('Dendritic', 571), ('NK', 438), ('Fibroblast', 121), ('Plasma', 73), ('Megakaryocyte', 10)]
2020-11-03 11:55:25,813 training dataset shape: (3720, 16804)
2020-11-03 11:55:25,814 validation dataset shape: (3955, 16804)
2020-11-03 11:55:33,907 epoch: 1, train loss: 29.658782958984375, val loss: 19.158100128173828
2020-11-03 11:55:40,803 epoch: 2, train loss: 29.276447296142578, val loss: 18.959152221679688
2020-11-03 11:55:47,619 epoch: 3, train loss: 28.84733772277832, val loss: 18.868236541748047
2020-11-03 11:55:54,

scRNAseq pbmc_train_pdac_val (940, 32738) (15435, 28756)


2020-11-03 11:58:07,151 normalizing the expression counts for model training
2020-11-03 11:58:16,110 input dataset shape: (16375, 18993)
2020-11-03 11:58:16,112 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 11:58:16,115 possible cell types: [('CD4 T cell', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('NK', 1100), ('Dendritic', 1030), ('Plasma', 1000), ('Endothelial', 1000), ('Treg', 1000), ('Malignant', 1000), ('Fibroblast', 1000), ('Acinar', 1000), ('Islet', 1000), ('Mast', 1000), ('Epithelial', 968), ('Erythrocyte', 412), ('Tuft', 55), ('Megakaryocyte', 10)]
2020-11-03 11:58:20,871 training dataset shape: (7806, 18993)
2020-11-03 11:58:20,872 validation dataset shape: (8569, 18993)
2020-11-03 11:58:37,705 epoch: 1, train loss: 24.897624969482422, val loss: 11.99538135

scRNAseq pdac_train_brca_val (3296, 28756) (11253, 27131)


2020-11-03 12:04:35,787 normalizing the expression counts for model training
2020-11-03 12:04:47,217 input dataset shape: (14549, 26833)
2020-11-03 12:04:47,219 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 12:04:47,221 possible cell types: [('Treg', 1200), ('Monocyte', 1200), ('NK', 1200), ('CD8 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1200), ('B cell', 1200), ('CD4 T cell', 1200), ('Mast', 746), ('Dendritic', 592), ('Erythrocyte', 515), ('Epithelial', 200), ('Islet', 200), ('Acinar', 200), ('Tuft', 96)]
2020-11-03 12:04:52,251 training dataset shape: (6632, 26833)
2020-11-03 12:04:52,252 validation dataset shape: (7917, 26833)
2020-11-03 12:05:11,270 epoch: 1, train loss: 33.965389251708984, val loss: 37.786781311035156
2020-11-03 12:05:29,202 epoch: 2, tr

scRNAseq pdac_train_cesc_val (3296, 28756) (8449, 22928)


2020-11-03 12:11:44,062 normalizing the expression counts for model training
2020-11-03 12:11:51,920 input dataset shape: (11745, 22794)
2020-11-03 12:11:51,922 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 12:11:51,924 possible cell types: [('Monocyte', 1200), ('NK', 1200), ('Epithelial', 1200), ('CD8 T cell', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Plasma', 1135), ('CD4 T cell', 1118), ('Endothelial', 732), ('Mast', 249), ('Erythrocyte', 215), ('Treg', 200), ('Dendritic', 200), ('Islet', 200), ('Acinar', 200), ('B cell', 200), ('Tuft', 96)]
2020-11-03 12:11:55,435 training dataset shape: (5684, 22794)
2020-11-03 12:11:55,436 validation dataset shape: (6061, 22794)




2020-11-03 12:12:07,574 5 out of the last 95 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 12:12:10,123 epoch: 1, train loss: 33.60778045654297, val loss: 24.923023223876953
2020-11-03 12:12:23,434 epoch: 2, train loss: 32.390785217285156, val loss: 24.20905876159668
2020-11-03 12:12:36,951 epoch: 3, train loss: 31.826358795166016, val loss: 23.848529815673828
2020-11-03 12:12:50,319 epoch: 4, train loss: 31.45301055908203, val loss: 23.639816284179688
2020-11-03 12:13:03,552 epoch: 5, train loss: 31.230775833129883, val l

scRNAseq pdac_train_hnscc_val (3296, 28756) (10288, 26929)


2020-11-03 12:17:07,948 normalizing the expression counts for model training
2020-11-03 12:17:18,165 input dataset shape: (13584, 26359)
2020-11-03 12:17:18,166 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 12:17:18,169 possible cell types: [('Treg', 1200), ('Monocyte', 1200), ('NK', 1200), ('CD8 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('Endothelial', 1200), ('B cell', 1200), ('Mast', 1200), ('CD4 T cell', 1200), ('Erythrocyte', 488), ('Dendritic', 200), ('Epithelial', 200), ('Fibroblast', 200), ('Islet', 200), ('Acinar', 200), ('Tuft', 96)]
2020-11-03 12:17:22,564 training dataset shape: (6115, 26359)
2020-11-03 12:17:22,565 validation dataset shape: (7469, 26359)
2020-11-03 12:17:40,297 epoch: 1, train loss: 30.29351043701172, val loss: 25.943115234375
2020-11-03 12:17:56,984 epoch: 2, train 

scRNAseq pdac_train_melanoma_val (3296, 28756) (6735, 23452)


2020-11-03 12:23:43,675 normalizing the expression counts for model training
2020-11-03 12:23:50,057 input dataset shape: (10031, 21725)
2020-11-03 12:23:50,058 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 12:23:50,060 possible cell types: [('Monocyte', 1200), ('CD8 T cell', 1200), ('Malignant', 1200), ('B cell', 1200), ('CD4 T cell', 1200), ('Treg', 862), ('Dendritic', 741), ('NK', 538), ('Fibroblast', 321), ('Plasma', 273), ('Epithelial', 200), ('Erythrocyte', 200), ('Endothelial', 200), ('Islet', 200), ('Acinar', 200), ('Mast', 200), ('Tuft', 96)]
2020-11-03 12:23:52,856 training dataset shape: (5210, 21725)
2020-11-03 12:23:52,857 validation dataset shape: (4821, 21725)
2020-11-03 12:24:06,155 epoch: 1, train loss: 33.75859451293945, val loss: 46.65168762207031
2020-11-03 12:24:18,117 epoch: 2, train los

scRNAseq pdac_train_pbmc_val (3296, 28756) (1698, 32738)


2020-11-03 12:28:25,423 normalizing the expression counts for model training
2020-11-03 12:28:28,132 input dataset shape: (4994, 18993)
2020-11-03 12:28:28,133 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 12:28:28,134 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('B cell', 341), ('CD8 T cell', 306), ('NK', 263), ('Dendritic', 208), ('Treg', 200), ('Epithelial', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Malignant', 200), ('Fibroblast', 200), ('Endothelial', 200), ('Islet', 200), ('Acinar', 200), ('Mast', 200), ('Tuft', 96), ('Megakaryocyte', 3)]
2020-11-03 12:28:29,313 training dataset shape: (3395, 18993)
2020-11-03 12:28:29,314 validation dataset shape: (1599, 18993)




2020-11-03 12:28:36,601 5 out of the last 20 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 12:28:36,854 5 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 12:28:36,959 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 12:28:38,556 epoch: 1, train loss: 33.290672302246094, val loss: 42.09599304199219
2020-11-03 12:28:46,386 epoch: 2, train loss: 32.980953216552734, val loss: 41.671119689941406
2020-11-03 12:28:54,212 epoch: 3, train loss: 32.21879196166992, val loss: 40.911109924316406
2020-11-03 12:29:02,134 epoch: 4, train loss: 31.683059692382812, val loss: 40.265960693359375
2020-11-03 12:29:09,939 epoch: 5, train loss: 30.941320419311523, val 

snATACseq brca_train_gbm_val (2064, 19891) (5650, 19891)


2020-11-03 12:31:19,219 normalizing the expression counts for model training
2020-11-03 12:31:23,646 input dataset shape: (7714, 19891)
2020-11-03 12:31:23,648 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells', 'Treg']
2020-11-03 12:31:23,649 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('Endothelial', 229), ('B cell', 211), ('NK', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('Treg', 200), ('Monocyte', 200), ('Dendritic', 200), ('Mast', 64)]
2020-11-03 12:31:25,526 training dataset shape: (4303, 19891)
2020-11-03 12:31:25,527 validation dataset shape: (3411, 19891)




2020-11-03 12:31:34,261 5 out of the last 76 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 12:31:36,445 epoch: 1, train loss: 40.19789505004883, val loss: 52.695533752441406
2020-11-03 12:31:46,012 epoch: 2, train loss: 40.18204879760742, val loss: 52.47862243652344
2020-11-03 12:31:55,717 epoch: 3, train loss: 39.665016174316406, val loss: 51.95933532714844
2020-11-03 12:32:05,381 epoch: 4, train loss: 39.574554443359375, val loss: 51.87075424194336
2020-11-03 12:32:14,981 epoch: 5, train loss: 39.232078552246094, val los

snATACseq gbm_train_brca_val (1316, 19891) (9028, 19891)


2020-11-03 12:35:16,496 normalizing the expression counts for model training
2020-11-03 12:35:22,380 input dataset shape: (10344, 19891)
2020-11-03 12:35:22,382 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells', 'Treg']
2020-11-03 12:35:22,384 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1072), ('B cell', 1044), ('Monocyte', 1000), ('Treg', 1000), ('CD8 T cell', 1000), ('CD4 T cell', 1000), ('Dendritic', 776), ('NK', 230), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('Mast', 22)]
2020-11-03 12:35:24,941 training dataset shape: (5303, 19891)
2020-11-03 12:35:24,942 validation dataset shape: (5041, 19891)
2020-11-03 12:35:37,626 epoch: 1, train loss: 38.734806060791016, val loss: 25.552745819091797
2020-11-03 12:35:49,183 epoch: 2, train loss: 38.45798873901367, val loss: 2

snRNAseq brca_train_ccrcc_val (2455, 29175) (8605, 33538)


2020-11-03 12:40:09,366 normalizing the expression counts for model training
2020-11-03 12:40:18,786 input dataset shape: (11060, 29175)
2020-11-03 12:40:18,788 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 12:40:18,789 possible cell types: [('CD4 T cell', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('Monocyte', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1046), ('Epithelial', 1000), ('Treg', 574), ('Dendritic', 345), ('Plasma', 295), ('Adipocyte', 200), ('B cell', 200), ('Mast', 200)]
2020-11-03 12:40:22,267 training dataset shape: (5192, 29175)
2020-11-03 12:40:22,268 validation dataset shape: (5868, 29175)
2020-11-03 12:40:39,299 epoch: 1, train loss: 36.86466979980469, val loss: 43.19028091430664
2020-11-03 12:40:54,906 epoch: 2, train loss: 36.16983413696289, val loss: 42.72496032714844
2020-11-03 12:41:10,338 e

snRNAseq brca_train_gbm_val (2455, 29175) (6810, 29748)


2020-11-03 12:46:15,542 normalizing the expression counts for model training
2020-11-03 12:46:23,225 input dataset shape: (9265, 28713)
2020-11-03 12:46:23,226 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 12:46:23,228 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 578), ('Monocyte', 251), ('CD4 T cell', 200), ('Treg', 200), ('NK', 200), ('Adipocyte', 200), ('Mast', 200), ('CD8 T cell', 200), ('Plasma', 200), ('Dendritic', 55)]
2020-11-03 12:46:26,250 training dataset shape: (4900, 28713)
2020-11-03 12:46:26,251 validation dataset shape: (4365, 28713)




2020-11-03 12:46:39,715 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 12:46:42,190 epoch: 1, train loss: 43.06745910644531, val loss: 47.67670822143555
2020-11-03 12:46:56,249 epoch: 2, train loss: 42.161346435546875, val loss: 47.07756805419922
2020-11-03 12:47:10,547 epoch: 3, train loss: 41.5523681640625, val loss: 46.73456573486328
2020-11-03 12:47:25,021 epoch: 4, train loss: 41.214176177978516, val loss: 46.42081069946289
2020-11-03 12:47:39,502 epoch: 5, train loss: 40.92921829223633, val loss: 

snRNAseq ccrcc_train_brca_val (2113, 33538) (9490, 29175)


2020-11-03 12:51:49,051 normalizing the expression counts for model training
2020-11-03 12:51:58,924 input dataset shape: (11603, 29175)
2020-11-03 12:51:58,926 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 12:51:58,928 possible cell types: [('Endothelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Treg', 1200), ('CD4 T cell', 1200), ('Plasma', 1113), ('B cell', 1000), ('NK', 482), ('Dendritic', 217), ('Epithelial', 200), ('Mast', 118), ('Adipocyte', 73)]
2020-11-03 12:52:02,649 training dataset shape: (5243, 29175)
2020-11-03 12:52:02,650 validation dataset shape: (6360, 29175)
2020-11-03 12:52:19,010 epoch: 1, train loss: 37.67650604248047, val loss: 45.935394287109375
2020-11-03 12:52:34,415 epoch: 2, train loss: 37.148136138916016, val loss: 45.11798858642578
2020-11-03 12:52:49,974

snRNAseq ccrcc_train_gbm_val (2113, 33538) (6810, 29748)


2020-11-03 12:57:58,695 normalizing the expression counts for model training
2020-11-03 12:58:06,380 input dataset shape: (8923, 29748)
2020-11-03 12:58:06,382 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 12:58:06,384 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 378), ('Monocyte', 251), ('NK', 200), ('Dendritic', 200), ('CD8 T cell', 200), ('Treg', 200), ('Epithelial', 200), ('CD4 T cell', 200), ('Plasma', 113)]
2020-11-03 12:58:09,201 training dataset shape: (4727, 29748)
2020-11-03 12:58:09,202 validation dataset shape: (4196, 29748)




2020-11-03 12:58:21,688 5 out of the last 82 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 12:58:24,205 epoch: 1, train loss: 40.99190139770508, val loss: 46.22561264038086
2020-11-03 12:58:37,676 epoch: 2, train loss: 40.16083908081055, val loss: 45.779083251953125
2020-11-03 12:58:51,109 epoch: 3, train loss: 39.46126174926758, val loss: 44.96620178222656
2020-11-03 12:59:04,392 epoch: 4, train loss: 39.17843246459961, val loss: 44.30904769897461
2020-11-03 12:59:17,807 epoch: 5, train loss: 38.90746307373047, val loss: 

snRNAseq gbm_train_brca_val (1689, 29748) (9490, 29175)


2020-11-03 13:03:12,890 normalizing the expression counts for model training
2020-11-03 13:03:22,051 input dataset shape: (11179, 28713)
2020-11-03 13:03:22,053 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 13:03:22,055 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('B cell', 1200), ('Endothelial', 1200), ('Monocyte', 1089), ('CD4 T cell', 1000), ('CD8 T cell', 1000), ('Treg', 1000), ('Plasma', 1000), ('NK', 282), ('Neuron', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200), ('Mast', 118), ('Adipocyte', 73), ('Dendritic', 17)]
2020-11-03 13:03:25,745 training dataset shape: (5473, 28713)
2020-11-03 13:03:25,746 validation dataset shape: (5706, 28713)




2020-11-03 13:03:40,126 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 13:03:42,811 epoch: 1, train loss: 39.999752044677734, val loss: 42.07345962524414
2020-11-03 13:03:58,761 epoch: 2, train loss: 39.36997985839844, val loss: 41.3254508972168
2020-11-03 13:04:14,668 epoch: 3, train loss: 38.67570114135742, val loss: 40.84607696533203
2020-11-03 13:04:30,473 epoch: 4, train loss: 38.24592590332031, val loss: 40.582401275634766
2020-11-03 13:04:46,356 epoch: 5, train loss: 38.019981384277344, val loss:

snRNAseq gbm_train_ccrcc_val (1689, 29748) (8605, 33538)


2020-11-03 13:09:33,972 normalizing the expression counts for model training
2020-11-03 13:09:42,906 input dataset shape: (10294, 29748)
2020-11-03 13:09:42,908 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 13:09:42,910 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1200), ('Monocyte', 1089), ('NK', 1000), ('CD4 T cell', 1000), ('Epithelial', 1000), ('CD8 T cell', 846), ('Treg', 374), ('Dendritic', 290), ('Neuron', 200), ('B cell', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200), ('Plasma', 95)]
2020-11-03 13:09:46,239 training dataset shape: (5298, 29748)
2020-11-03 13:09:46,240 validation dataset shape: (4996, 29748)
2020-11-03 13:10:01,993 epoch: 1, train loss: 37.514373779296875, val loss: 46.715782165527344
2020-11-03 13:10:16,178 epoch: 2, trai

In [94]:
run_workflow_for_cross_datatype(adata_map, run_pollock_workflow, 'pollock', RESULTS_CROSS_DTYPE_DIR)

snRNAseq brca_train_brca_val (2600, 27131) (9028, 19891)


2020-11-03 13:28:03,274 normalizing the expression counts for model training
2020-11-03 13:28:09,083 input dataset shape: (11628, 17565)
2020-11-03 13:28:09,085 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 13:28:09,088 possible cell types: [('CD8 T cell', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Fibroblast', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('B cell', 1200), ('Dendritic', 976), ('NK', 430), ('Mast', 222), ('Plasma', 200), ('Erythrocyte', 200)]
2020-11-03 13:28:11,851 training dataset shape: (5236, 17565)
2020-11-03 13:28:11,852 validation dataset shape: (6392, 17565)
2020-11-03 13:28:23,842 epoch: 1, train loss: 30.368574142456055, val loss: 15.81157112121582
2020-11-03 13:28:34,406 epoch: 2, train loss: 29.463855743408203, val loss: 13.043060302734375
2020-11-03 13:28:44,993 epoch: 3, train loss: 29.2584

snRNAseq brca_train_gbm_val (2600, 27131) (5650, 19891)


2020-11-03 13:32:32,063 normalizing the expression counts for model training
2020-11-03 13:32:36,180 input dataset shape: (8250, 17565)
2020-11-03 13:32:36,181 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 13:32:36,183 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('Endothelial', 229), ('B cell', 211), ('NK', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('Treg', 200), ('Monocyte', 200), ('Mast', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Dendritic', 200)]
2020-11-03 13:32:38,253 training dataset shape: (4703, 17565)
2020-11-03 13:32:38,254 validation dataset shape: (3547, 17565)




2020-11-03 13:32:47,033 5 out of the last 82 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 13:32:49,191 epoch: 1, train loss: 28.62729263305664, val loss: 16.150346755981445
2020-11-03 13:32:58,611 epoch: 2, train loss: 27.920495986938477, val loss: 14.999509811401367
2020-11-03 13:33:08,318 epoch: 3, train loss: 27.67705535888672, val loss: 14.845916748046875
2020-11-03 13:33:18,003 epoch: 4, train loss: 27.508895874023438, val loss: 14.541648864746094
2020-11-03 13:33:27,664 epoch: 5, train loss: 27.255340576171875, val 

snRNAseq cesc_train_brca_val (1941, 22928) (9028, 19891)


2020-11-03 13:36:25,107 normalizing the expression counts for model training
2020-11-03 13:36:30,214 input dataset shape: (10969, 16509)
2020-11-03 13:36:30,216 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 13:36:30,218 possible cell types: [('Fibroblast', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Malignant', 1200), ('Treg', 1000), ('B cell', 1000), ('Dendritic', 776), ('NK', 430), ('Epithelial', 200), ('Plasma', 200), ('Mast', 111), ('Erythrocyte', 52)]
2020-11-03 13:36:32,807 training dataset shape: (5194, 16509)
2020-11-03 13:36:32,808 validation dataset shape: (5775, 16509)




2020-11-03 13:36:42,418 5 out of the last 20 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 13:36:43,775 epoch: 1, train loss: 29.95317268371582, val loss: 18.205394744873047
2020-11-03 13:36:53,720 epoch: 2, train loss: 29.498239517211914, val loss: 17.31381607055664
2020-11-03 13:37:03,580 epoch: 3, train loss: 29.217782974243164, val loss: 17.20269775390625
2020-11-03 13:37:13,574 epoch: 4, train loss: 28.957897186279297, val loss: 16.976694107055664
2020-11-03 13:37:23,464 epoch: 5, train loss: 28.697837829589844, val l

snRNAseq cesc_train_gbm_val (1941, 22928) (5650, 19891)


2020-11-03 13:40:41,462 normalizing the expression counts for model training
2020-11-03 13:40:45,077 input dataset shape: (7591, 16509)
2020-11-03 13:40:45,079 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-03 13:40:45,081 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('Endothelial', 229), ('Monocyte', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('Epithelial', 200), ('Plasma', 200), ('NK', 200), ('Mast', 89), ('Erythrocyte', 52), ('B cell', 11)]
2020-11-03 13:40:46,824 training dataset shape: (4208, 16509)
2020-11-03 13:40:46,825 validation dataset shape: (3383, 16509)




2020-11-03 13:40:54,360 5 out of the last 71 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 13:40:54,457 6 out of the last 72 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 13:40:54,670 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 13:40:54,765 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 13:40:54,852 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 13:40:55,090 5 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 13:40:57,160 epoch: 1, train loss: 30.6953067779541, val loss: 15.771256446838379
2020-11-03 13:41:05,660 epoch: 2, train loss: 29.74069595336914, val loss: 15.004144668579102
2020-11-03 13:41:14,111 epoch: 3, train loss: 29.400615692138672, val loss: 14.940890312194824
2020-11-03 13:41:22,480 epoch: 4, train loss: 29.25402069091797, val loss: 14.825430870056152
2020-11-03 13:41:31,153 epoch: 5, train loss: 29.075857162475586, val lo

snRNAseq hnscc_train_brca_val (2200, 26929) (9028, 19891)


2020-11-03 13:44:12,100 normalizing the expression counts for model training
2020-11-03 13:44:17,644 input dataset shape: (11228, 17615)
2020-11-03 13:44:17,646 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 13:44:17,648 possible cell types: [('B cell', 1200), ('CD4 T cell', 1200), ('Endothelial', 1200), ('Treg', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1200), ('Fibroblast', 1000), ('Dendritic', 776), ('NK', 430), ('Mast', 222), ('Plasma', 200), ('Erythrocyte', 200)]
2020-11-03 13:44:20,338 training dataset shape: (5236, 17615)
2020-11-03 13:44:20,339 validation dataset shape: (5992, 17615)
2020-11-03 13:44:31,569 epoch: 1, train loss: 30.267385482788086, val loss: 9.726682662963867
2020-11-03 13:44:41,797 epoch: 2, train loss: 29.476022720336914, val loss: 8.399194717407227
2020-11-03 13:44:52,249 epoch: 3, train loss: 29.22235

snRNAseq hnscc_train_gbm_val (2200, 26929) (5650, 19891)


2020-11-03 13:48:37,159 normalizing the expression counts for model training
2020-11-03 13:48:41,043 input dataset shape: (7850, 17615)
2020-11-03 13:48:41,044 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 13:48:41,046 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 610), ('Endothelial', 229), ('B cell', 211), ('CD4 T cell', 200), ('Treg', 200), ('Plasma', 200), ('CD8 T cell', 200), ('Monocyte', 200), ('Mast', 200), ('NK', 200), ('Erythrocyte', 200)]
2020-11-03 13:48:42,935 training dataset shape: (4363, 17615)
2020-11-03 13:48:42,936 validation dataset shape: (3487, 17615)




2020-11-03 13:48:51,176 5 out of the last 77 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 13:48:53,149 epoch: 1, train loss: 29.380186080932617, val loss: 10.799787521362305
2020-11-03 13:49:01,876 epoch: 2, train loss: 28.012439727783203, val loss: 9.421533584594727
2020-11-03 13:49:11,204 epoch: 3, train loss: 27.88461685180664, val loss: 9.340560913085938
2020-11-03 13:49:20,127 epoch: 4, train loss: 27.628328323364258, val loss: 9.252691268920898
2020-11-03 13:49:29,202 epoch: 5, train loss: 27.415414810180664, val lo

snRNAseq melanoma_train_brca_val (2000, 23452) (9028, 19891)


2020-11-03 13:52:18,487 normalizing the expression counts for model training
2020-11-03 13:52:23,599 input dataset shape: (11028, 16275)
2020-11-03 13:52:23,601 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 13:52:23,603 possible cell types: [('Malignant', 1200), ('Treg', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Fibroblast', 1200), ('CD4 T cell', 1200), ('Monocyte', 1200), ('Endothelial', 1000), ('Dendritic', 976), ('NK', 430), ('Plasma', 200), ('Mast', 22)]
2020-11-03 13:52:26,013 training dataset shape: (4931, 16275)
2020-11-03 13:52:26,014 validation dataset shape: (6097, 16275)
2020-11-03 13:52:36,210 epoch: 1, train loss: 30.0792179107666, val loss: 17.59044647216797
2020-11-03 13:52:45,544 epoch: 2, train loss: 29.0787410736084, val loss: 13.52365779876709
2020-11-03 13:52:55,016 epoch: 3, train loss: 28.73115348815918, val loss: 13.367280960083008
20

snRNAseq melanoma_train_gbm_val (2000, 23452) (5650, 19891)


2020-11-03 13:56:19,107 normalizing the expression counts for model training
2020-11-03 13:56:22,626 input dataset shape: (7650, 16275)
2020-11-03 13:56:22,627 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 13:56:22,628 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('B cell', 211), ('Treg', 200), ('Plasma', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('NK', 200), ('Dendritic', 200), ('Monocyte', 200), ('Endothelial', 29)]
2020-11-03 13:56:24,321 training dataset shape: (4248, 16275)
2020-11-03 13:56:24,321 validation dataset shape: (3402, 16275)




2020-11-03 13:56:32,836 5 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 13:56:32,928 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 13:56:34,640 epoch: 1, train loss: 28.923145294189453, val loss: 17.456485748291016
2020-11-03 13:56:43,182 epoch: 2, train loss: 27.62527847290039, val loss: 14.580802917480469
2020-11-03 13:56:51,637 epoch: 3, train loss: 27.489667892456055, val loss: 14.340185165405273
2020-11-03 13:57:00,072 epoch: 4, train loss: 27.24112319946289, val loss: 14.199554443359375
2020-11-03 13:57:08,639 epoch: 5, train loss: 26.94217300415039, val l

snRNAseq pbmc_train_brca_val (940, 32738) (9028, 19891)


2020-11-03 13:59:45,412 normalizing the expression counts for model training
2020-11-03 13:59:50,690 input dataset shape: (9968, 18919)
2020-11-03 13:59:50,691 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Treg']
2020-11-03 13:59:50,693 possible cell types: [('CD4 T cell', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1000), ('Endothelial', 1000), ('Fibroblast', 1000), ('Treg', 1000), ('Dendritic', 806), ('NK', 330), ('Mast', 22), ('Megakaryocyte', 10)]
2020-11-03 13:59:52,968 training dataset shape: (4745, 18919)
2020-11-03 13:59:52,969 validation dataset shape: (5223, 18919)




2020-11-03 14:00:02,667 5 out of the last 18 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 14:00:03,734 epoch: 1, train loss: 34.949344635009766, val loss: 9.493606567382812
2020-11-03 14:00:13,537 epoch: 2, train loss: 34.41702651977539, val loss: 8.968344688415527
2020-11-03 14:00:23,402 epoch: 3, train loss: 34.13623046875, val loss: 6.908611297607422
2020-11-03 14:00:33,202 epoch: 4, train loss: 33.52253341674805, val loss: 6.787102699279785
2020-11-03 14:00:43,353 epoch: 5, train loss: 33.36555480957031, val loss: 6.7

snRNAseq pbmc_train_gbm_val (940, 32738) (5650, 19891)


2020-11-03 14:04:00,472 normalizing the expression counts for model training
2020-11-03 14:04:04,091 input dataset shape: (6590, 18919)
2020-11-03 14:04:04,092 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-03 14:04:04,094 possible cell types: [('Malignant', 1000), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 610), ('B cell', 211), ('CD4 T cell', 200), ('CD8 T cell', 200), ('Monocyte', 200), ('NK', 100), ('Dendritic', 30), ('Endothelial', 29), ('Megakaryocyte', 10)]
2020-11-03 14:04:05,611 training dataset shape: (3569, 18919)
2020-11-03 14:04:05,612 validation dataset shape: (3021, 18919)




2020-11-03 14:04:12,759 5 out of the last 65 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:04:12,847 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:04:12,937 6 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:04:13,158 6 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:04:13,244 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:04:13,536 5 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 14:04:14,600 epoch: 1, train loss: 33.22473907470703, val loss: 10.309225082397461
2020-11-03 14:04:22,217 epoch: 2, train loss: 32.998104095458984, val loss: 9.775554656982422
2020-11-03 14:04:29,840 epoch: 3, train loss: 32.31700134277344, val loss: 7.4095964431762695
2020-11-03 14:04:37,468 epoch: 4, train loss: 31.997758865356445, val loss: 7.134622097015381
2020-11-03 14:04:45,103 epoch: 5, train loss: 31.86823081970215, val los

snRNAseq pdac_train_brca_val (3296, 28756) (9028, 19891)


2020-11-03 14:07:12,271 normalizing the expression counts for model training
2020-11-03 14:07:18,558 input dataset shape: (12324, 17904)
2020-11-03 14:07:18,560 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 14:07:18,562 possible cell types: [('Treg', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1200), ('B cell', 1200), ('CD4 T cell', 1200), ('Dendritic', 976), ('NK', 430), ('Mast', 222), ('Epithelial', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Islet', 200), ('Acinar', 200), ('Tuft', 96)]
2020-11-03 14:07:21,821 training dataset shape: (5768, 17904)
2020-11-03 14:07:21,822 validation dataset shape: (6556, 17904)
2020-11-03 14:07:33,822 epoch: 1, train loss: 27.683120727539062, val loss: 15.53511905670166
2020-11-03 14:07:44,501 epoch: 2, train

snRNAseq pdac_train_gbm_val (3296, 28756) (5650, 19891)


2020-11-03 14:11:47,271 normalizing the expression counts for model training
2020-11-03 14:11:51,752 input dataset shape: (8946, 17904)
2020-11-03 14:11:51,754 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-03 14:11:51,756 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('Endothelial', 229), ('B cell', 211), ('Treg', 200), ('Dendritic', 200), ('Monocyte', 200), ('NK', 200), ('Epithelial', 200), ('CD8 T cell', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Islet', 200), ('Acinar', 200), ('Mast', 200), ('CD4 T cell', 200), ('Tuft', 96)]
2020-11-03 14:11:54,196 training dataset shape: (5251, 17904)
2020-11-03 14:11:54,197 validation dataset shape: (3695, 17904)




2020-11-03 14:12:03,045 5 out of the last 91 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:12:03,173 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 14:12:05,918 epoch: 1, train loss: 27.18524932861328, val loss: 16.36088752746582
2020-11-03 14:12:16,037 epoch: 2, train loss: 26.76171875, val loss: 15.836007118225098
2020-11-03 14:12:26,050 epoch: 3, train loss: 26.53240394592285, val loss: 15.52204418182373
2020-11-03 14:12:36,046 epoch: 4, train loss: 26.17377471923828, val loss: 15.219148635864258
2020-11-03 14:12:45,951 epoch: 5, train loss: 25.924711227416992, val loss: 14.9

snRNAseq brca_train_brca_val (2600, 27131) (9490, 29175)


2020-11-03 14:15:51,771 normalizing the expression counts for model training
2020-11-03 14:16:00,502 input dataset shape: (12090, 25674)
2020-11-03 14:16:00,504 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 14:16:00,506 possible cell types: [('CD8 T cell', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Fibroblast', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('B cell', 1200), ('Plasma', 1200), ('NK', 482), ('Mast', 318), ('Dendritic', 217), ('Erythrocyte', 200), ('Adipocyte', 73)]
2020-11-03 14:16:04,165 training dataset shape: (5368, 25674)
2020-11-03 14:16:04,166 validation dataset shape: (6722, 25674)
2020-11-03 14:16:18,919 epoch: 1, train loss: 36.376487731933594, val loss: 27.205547332763672
2020-11-03 14:16:33,159 epoch: 2, train loss: 35.737152099609375, val loss: 26.302650451660156
2020-11-03 14:16:47

snRNAseq brca_train_ccrcc_val (2600, 27131) (8605, 33538)


2020-11-03 14:21:26,936 normalizing the expression counts for model training
2020-11-03 14:21:35,671 input dataset shape: (11205, 27131)
2020-11-03 14:21:35,673 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 14:21:35,675 possible cell types: [('NK', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('Fibroblast', 1200), ('Malignant', 1200), ('Monocyte', 1200), ('CD8 T cell', 1046), ('Epithelial', 1000), ('Treg', 574), ('Dendritic', 490), ('Plasma', 295), ('B cell', 200), ('Mast', 200), ('Erythrocyte', 200)]
2020-11-03 14:21:39,094 training dataset shape: (5262, 27131)
2020-11-03 14:21:39,095 validation dataset shape: (5943, 27131)
2020-11-03 14:21:54,703 epoch: 1, train loss: 34.251487731933594, val loss: 34.031707763671875
2020-11-03 14:22:09,295 epoch: 2, train loss: 33.47380447387695, val loss: 33.02424621582031
2020-11-03 14:22:23

snRNAseq brca_train_gbm_val (2600, 27131) (6810, 29748)


2020-11-03 14:27:17,431 normalizing the expression counts for model training
2020-11-03 14:27:24,349 input dataset shape: (9410, 25705)
2020-11-03 14:27:24,351 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 14:27:24,353 possible cell types: [('Fibroblast', 1200), ('Malignant', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 578), ('Monocyte', 251), ('NK', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('Treg', 200), ('Mast', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Dendritic', 200)]
2020-11-03 14:27:27,198 training dataset shape: (5029, 25705)
2020-11-03 14:27:27,199 validation dataset shape: (4381, 25705)
2020-11-03 14:27:41,911 epoch: 1, train loss: 38.98710250854492, val loss: 34.86935043334961
2020-11-03 14:27

snRNAseq cesc_train_brca_val (1941, 22928) (9490, 29175)


2020-11-03 14:32:36,797 normalizing the expression counts for model training
2020-11-03 14:32:43,836 input dataset shape: (11431, 22001)
2020-11-03 14:32:43,838 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 14:32:43,840 possible cell types: [('Fibroblast', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('B cell', 1000), ('Treg', 1000), ('NK', 482), ('Mast', 207), ('Epithelial', 200), ('Adipocyte', 73), ('Erythrocyte', 52), ('Dendritic', 17)]
2020-11-03 14:32:47,116 training dataset shape: (5201, 22001)
2020-11-03 14:32:47,117 validation dataset shape: (6230, 22001)




2020-11-03 14:32:58,606 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:32:58,896 5 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:32:58,987 6 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:32:59,258 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 14:33:00,683 epoch: 1, train loss: 34.416954040527344, val loss: 34.34684753417969
2020-11-03 14:33:12,700 epoch: 2, train loss: 33.62413787841797, val loss: 33.501564025878906
2020-11-03 14:33:25,245 epoch: 3, train loss: 33.180091857910156, val loss: 33.02786636352539
2020-11-03 14:33:37,531 epoch: 4, train loss: 32.811309814453125, val loss: 32.69691467285156
2020-11-03 14:33:49,830 epoch: 5, train loss: 32.57563018798828, val los

snRNAseq cesc_train_ccrcc_val (1941, 22928) (8605, 33538)


2020-11-03 14:37:33,527 normalizing the expression counts for model training
2020-11-03 14:37:40,442 input dataset shape: (10546, 22919)
2020-11-03 14:37:40,444 possible cell types: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 14:37:40,446 possible cell types: [('Fibroblast', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('CD4 T cell', 1200), ('Epithelial', 1200), ('Malignant', 1200), ('NK', 1200), ('CD8 T cell', 1046), ('Treg', 374), ('Plasma', 295), ('Dendritic', 290), ('Mast', 89), ('Erythrocyte', 52)]
2020-11-03 14:37:43,296 training dataset shape: (4763, 22919)
2020-11-03 14:37:43,297 validation dataset shape: (5783, 22919)
2020-11-03 14:37:55,910 epoch: 1, train loss: 31.883825302124023, val loss: 37.664405822753906
2020-11-03 14:38:07,421 epoch: 2, train loss: 31.062040328979492, val loss: 36.926422119140625
2020-11-03 14:38:19,003 epoch: 3, train loss: 

snRNAseq cesc_train_gbm_val (1941, 22928) (6810, 29748)


2020-11-03 14:42:17,760 normalizing the expression counts for model training
2020-11-03 14:42:23,160 input dataset shape: (8751, 21981)
2020-11-03 14:42:23,161 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-03 14:42:23,163 possible cell types: [('Fibroblast', 1200), ('Malignant', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 378), ('Monocyte', 251), ('CD8 T cell', 200), ('CD4 T cell', 200), ('Epithelial', 200), ('Plasma', 200), ('NK', 200), ('Mast', 89), ('Erythrocyte', 52)]
2020-11-03 14:42:25,485 training dataset shape: (4607, 21981)
2020-11-03 14:42:25,486 validation dataset shape: (4144, 21981)




2020-11-03 14:42:35,261 5 out of the last 80 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:42:35,446 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:42:35,543 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:42:35,632 6 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 14:42:35,908 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 14:42:37,607 epoch: 1, train loss: 37.35302734375, val loss: 33.23747253417969
2020-11-03 14:42:48,490 epoch: 2, train loss: 36.4340934753418, val loss: 32.823829650878906
2020-11-03 14:42:59,584 epoch: 3, train loss: 35.81965637207031, val loss: 32.364112854003906
2020-11-03 14:43:10,469 epoch: 4, train loss: 35.462276458740234, val loss: 32.06382369995117
2020-11-03 14:43:21,348 epoch: 5, train loss: 35.17479705810547, val loss: 31

snRNAseq hnscc_train_brca_val (2200, 26929) (9490, 29175)


2020-11-03 14:46:39,017 normalizing the expression counts for model training
2020-11-03 14:46:47,388 input dataset shape: (11690, 25299)
2020-11-03 14:46:47,389 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 14:46:47,391 possible cell types: [('B cell', 1200), ('CD4 T cell', 1200), ('Endothelial', 1200), ('Treg', 1200), ('Plasma', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1200), ('Fibroblast', 1000), ('NK', 482), ('Mast', 318), ('Erythrocyte', 200), ('Adipocyte', 73), ('Dendritic', 17)]
2020-11-03 14:46:50,857 training dataset shape: (5205, 25299)
2020-11-03 14:46:50,858 validation dataset shape: (6485, 25299)




2020-11-03 14:47:03,573 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 14:47:06,201 epoch: 1, train loss: 35.200496673583984, val loss: 18.56930923461914
2020-11-03 14:47:20,304 epoch: 2, train loss: 34.46210479736328, val loss: 17.88840103149414
2020-11-03 14:47:34,066 epoch: 3, train loss: 34.034420013427734, val loss: 17.709951400756836
2020-11-03 14:47:48,219 epoch: 4, train loss: 33.60197067260742, val loss: 17.55510902404785
2020-11-03 14:48:02,139 epoch: 5, train loss: 33.352210998535156, val los

snRNAseq hnscc_train_ccrcc_val (2200, 26929) (8605, 33538)


2020-11-03 14:52:14,442 normalizing the expression counts for model training
2020-11-03 14:52:22,738 input dataset shape: (10805, 26918)
2020-11-03 14:52:22,740 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 14:52:22,742 possible cell types: [('CD4 T cell', 1200), ('Endothelial', 1200), ('Monocyte', 1200), ('Malignant', 1200), ('NK', 1200), ('CD8 T cell', 1046), ('Fibroblast', 1000), ('Epithelial', 1000), ('Treg', 574), ('Plasma', 295), ('Dendritic', 290), ('B cell', 200), ('Mast', 200), ('Erythrocyte', 200)]
2020-11-03 14:52:26,228 training dataset shape: (5189, 26918)
2020-11-03 14:52:26,229 validation dataset shape: (5616, 26918)
2020-11-03 14:52:41,239 epoch: 1, train loss: 32.91059112548828, val loss: 20.24225616455078
2020-11-03 14:52:55,082 epoch: 2, train loss: 32.08892822265625, val loss: 19.596050262451172
2020-11-03 14:53:09,

snRNAseq hnscc_train_gbm_val (2200, 26929) (6810, 29748)


2020-11-03 14:57:50,355 normalizing the expression counts for model training
2020-11-03 14:57:56,850 input dataset shape: (9010, 25389)
2020-11-03 14:57:56,851 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 14:57:56,853 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Fibroblast', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 578), ('Monocyte', 251), ('CD4 T cell', 200), ('Treg', 200), ('Plasma', 200), ('CD8 T cell', 200), ('Mast', 200), ('NK', 200), ('Erythrocyte', 200)]
2020-11-03 14:57:59,485 training dataset shape: (4880, 25389)
2020-11-03 14:57:59,486 validation dataset shape: (4130, 25389)
2020-11-03 14:58:13,822 epoch: 1, train loss: 38.07807159423828, val loss: 21.228343963623047
2020-11-03 14:58:27,037 epoch: 2, train loss: 36

snRNAseq melanoma_train_brca_val (2000, 23452) (9490, 29175)


2020-11-03 15:02:58,969 normalizing the expression counts for model training
2020-11-03 15:03:05,762 input dataset shape: (11490, 21018)
2020-11-03 15:03:05,764 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:03:05,766 possible cell types: [('Malignant', 1200), ('Treg', 1200), ('Plasma', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Fibroblast', 1200), ('CD4 T cell', 1200), ('Monocyte', 1200), ('Endothelial', 1000), ('NK', 482), ('Dendritic', 217), ('Mast', 118), ('Adipocyte', 73)]
2020-11-03 15:03:08,844 training dataset shape: (5083, 21018)
2020-11-03 15:03:08,845 validation dataset shape: (6407, 21018)
2020-11-03 15:03:21,297 epoch: 1, train loss: 35.03501510620117, val loss: 30.52936363220215
2020-11-03 15:03:32,608 epoch: 2, train loss: 34.01557922363281, val loss: 28.506433486938477
2020-11-03 15:03:44,002 epoch: 3, train loss: 33.5922737121

snRNAseq melanoma_train_ccrcc_val (2000, 23452) (8605, 33538)


2020-11-03 15:07:33,334 normalizing the expression counts for model training
2020-11-03 15:07:39,956 input dataset shape: (10605, 21975)
2020-11-03 15:07:39,957 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:07:39,959 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('CD4 T cell', 1200), ('NK', 1200), ('Monocyte', 1200), ('CD8 T cell', 1046), ('Endothelial', 1000), ('Epithelial', 1000), ('Treg', 574), ('Dendritic', 490), ('Plasma', 295), ('B cell', 200)]
2020-11-03 15:07:42,688 training dataset shape: (4954, 21975)
2020-11-03 15:07:42,689 validation dataset shape: (5651, 21975)
2020-11-03 15:07:55,200 epoch: 1, train loss: 32.882286071777344, val loss: 38.086570739746094
2020-11-03 15:08:06,727 epoch: 2, train loss: 31.693910598754883, val loss: 36.25878143310547
2020-11-03 15:08:18,265 epoch: 3, train loss: 31.288482666015625, val loss: 36

snRNAseq melanoma_train_gbm_val (2000, 23452) (6810, 29748)


2020-11-03 15:12:10,872 normalizing the expression counts for model training
2020-11-03 15:12:16,086 input dataset shape: (8810, 21069)
2020-11-03 15:12:16,088 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 15:12:16,090 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('B cell', 578), ('Endothelial', 381), ('Monocyte', 251), ('Treg', 200), ('Plasma', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('NK', 200), ('Dendritic', 200)]
2020-11-03 15:12:18,437 training dataset shape: (4651, 21069)
2020-11-03 15:12:18,438 validation dataset shape: (4159, 21069)
2020-11-03 15:12:30,427 epoch: 1, train loss: 37.34528732299805, val loss: 35.06938171386719
2020-11-03 15:12:41,166 epoch: 2, train loss: 36.01210403442383, val loss: 3

snRNAseq pbmc_train_brca_val (940, 32738) (9490, 29175)


2020-11-03 15:16:24,035 normalizing the expression counts for model training
2020-11-03 15:16:29,635 input dataset shape: (10430, 18731)
2020-11-03 15:16:29,637 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:16:29,639 possible cell types: [('CD4 T cell', 1200), ('B cell', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('Malignant', 1000), ('Fibroblast', 1000), ('Endothelial', 1000), ('Treg', 1000), ('Plasma', 1000), ('NK', 382), ('Mast', 118), ('Adipocyte', 73), ('Dendritic', 47), ('Megakaryocyte', 10)]
2020-11-03 15:16:32,243 training dataset shape: (4940, 18731)
2020-11-03 15:16:32,244 validation dataset shape: (5490, 18731)




2020-11-03 15:16:41,901 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 15:16:43,712 epoch: 1, train loss: 31.333980560302734, val loss: 15.819563865661621
2020-11-03 15:16:54,024 epoch: 2, train loss: 30.58341407775879, val loss: 13.568857192993164
2020-11-03 15:17:04,331 epoch: 3, train loss: 30.091821670532227, val loss: 12.941605567932129
2020-11-03 15:17:14,546 epoch: 4, train loss: 29.699546813964844, val loss: 12.843783378601074
2020-11-03 15:17:24,807 epoch: 5, train loss: 29.55184555053711, val 

snRNAseq pbmc_train_ccrcc_val (940, 32738) (8605, 33538)


2020-11-03 15:20:31,989 normalizing the expression counts for model training
2020-11-03 15:20:37,591 input dataset shape: (9545, 20453)
2020-11-03 15:20:37,592 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:20:37,594 possible cell types: [('CD4 T cell', 1200), ('Monocyte', 1200), ('NK', 1100), ('CD8 T cell', 1046), ('Malignant', 1000), ('Fibroblast', 1000), ('Endothelial', 1000), ('Epithelial', 1000), ('Treg', 374), ('Dendritic', 320), ('B cell', 200), ('Plasma', 95), ('Megakaryocyte', 10)]
2020-11-03 15:20:39,925 training dataset shape: (4699, 20453)
2020-11-03 15:20:39,926 validation dataset shape: (4846, 20453)
2020-11-03 15:20:51,209 epoch: 1, train loss: 29.46137046813965, val loss: 21.639455795288086
2020-11-03 15:21:01,551 epoch: 2, train loss: 28.584413528442383, val loss: 18.73427963256836
2020-11-03 15:21:11,737 epoch: 3, train l

snRNAseq pbmc_train_gbm_val (940, 32738) (6810, 29748)


2020-11-03 15:24:40,516 normalizing the expression counts for model training
2020-11-03 15:24:44,725 input dataset shape: (7750, 18949)
2020-11-03 15:24:44,727 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-03 15:24:44,729 possible cell types: [('Microglia', 1000), ('Malignant', 1000), ('Neuron', 1000), ('Fibroblast', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('B cell', 578), ('Endothelial', 381), ('Monocyte', 251), ('CD4 T cell', 200), ('CD8 T cell', 200), ('NK', 100), ('Dendritic', 30), ('Megakaryocyte', 10)]
2020-11-03 15:24:46,543 training dataset shape: (4160, 18949)
2020-11-03 15:24:46,544 validation dataset shape: (3590, 18949)




2020-11-03 15:24:54,396 5 out of the last 75 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 15:24:54,758 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 15:24:56,214 epoch: 1, train loss: 33.85918045043945, val loss: 18.957477569580078
2020-11-03 15:25:04,996 epoch: 2, train loss: 32.859535217285156, val loss: 15.825884819030762
2020-11-03 15:25:13,725 epoch: 3, train loss: 32.08671569824219, val loss: 15.593043327331543
2020-11-03 15:25:22,615 epoch: 4, train loss: 31.76056480407715, val loss: 15.547697067260742
2020-11-03 15:25:31,304 epoch: 5, train loss: 31.526390075683594, val l

snRNAseq pdac_train_brca_val (3296, 28756) (9490, 29175)


2020-11-03 15:28:10,361 normalizing the expression counts for model training
2020-11-03 15:28:20,195 input dataset shape: (12786, 26783)
2020-11-03 15:28:20,197 possible cell types: ['Acinar', 'Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 15:28:20,199 possible cell types: [('Treg', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Plasma', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1200), ('B cell', 1200), ('CD4 T cell', 1200), ('NK', 482), ('Mast', 318), ('Dendritic', 217), ('Epithelial', 200), ('Erythrocyte', 200), ('Islet', 200), ('Acinar', 200), ('Tuft', 96), ('Adipocyte', 73)]
2020-11-03 15:28:24,576 training dataset shape: (5901, 26783)
2020-11-03 15:28:24,577 validation dataset shape: (6885, 26783)




2020-11-03 15:28:39,275 5 out of the last 99 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 15:28:42,326 epoch: 1, train loss: 35.213008880615234, val loss: 29.897964477539062
2020-11-03 15:28:58,728 epoch: 2, train loss: 34.56834411621094, val loss: 28.977693557739258
2020-11-03 15:29:15,097 epoch: 3, train loss: 34.03038024902344, val loss: 28.479427337646484
2020-11-03 15:29:31,790 epoch: 4, train loss: 33.55194091796875, val loss: 27.989574432373047
2020-11-03 15:29:48,437 epoch: 5, train loss: 33.278297424316406, val l

snRNAseq pdac_train_ccrcc_val (3296, 28756) (8605, 33538)


2020-11-03 15:34:39,798 normalizing the expression counts for model training
2020-11-03 15:34:49,577 input dataset shape: (11901, 28756)
2020-11-03 15:34:49,579 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 15:34:49,581 possible cell types: [('Monocyte', 1200), ('NK', 1200), ('Epithelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1200), ('CD4 T cell', 1200), ('CD8 T cell', 1046), ('Treg', 574), ('Dendritic', 490), ('Plasma', 295), ('Erythrocyte', 200), ('Islet', 200), ('Acinar', 200), ('B cell', 200), ('Mast', 200), ('Tuft', 96)]
2020-11-03 15:34:53,559 training dataset shape: (5641, 28756)
2020-11-03 15:34:53,561 validation dataset shape: (6260, 28756)
2020-11-03 15:35:09,721 epoch: 1, train loss: 32.937591552734375, val loss: 36.180118560791016
2020-11-03 15:35:25,081 epoch: 2, trai

snRNAseq pdac_train_gbm_val (3296, 28756) (6810, 29748)


2020-11-03 15:40:43,090 normalizing the expression counts for model training
2020-11-03 15:40:50,731 input dataset shape: (10106, 27015)
2020-11-03 15:40:50,733 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-03 15:40:50,735 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 578), ('Monocyte', 251), ('Treg', 200), ('Dendritic', 200), ('NK', 200), ('Epithelial', 200), ('CD8 T cell', 200), ('Plasma', 200), ('Erythrocyte', 200), ('Islet', 200), ('Acinar', 200), ('Mast', 200), ('CD4 T cell', 200), ('Tuft', 96)]
2020-11-03 15:40:54,090 training dataset shape: (5565, 27015)
2020-11-03 15:40:54,092 validation dataset shape: (4541, 27015)



2020-11-03 15:41:07,933 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 15:41:11,197 epoch: 1, train loss: 37.50226974487305, val loss: 34.10360336303711
2020-11-03 15:41:26,848 epoch: 2, train loss: 36.55294418334961, val loss: 33.4132080078125
2020-11-03 15:41:42,665 epoch: 3, train loss: 35.90336227416992, val loss: 32.740753173828125
2020-11-03 15:41:58,489 epoch: 4, train loss: 35.538143157958984, val loss: 32.14366912841797
2020-11-03 15:42:14,175 epoch: 5, train loss: 35.27581787109375, val loss: 

snRNAseq brca_train_brca_val (2064, 19891) (11253, 27131)


2020-11-03 15:46:49,924 normalizing the expression counts for model training
2020-11-03 15:46:56,465 input dataset shape: (13317, 17565)
2020-11-03 15:46:56,467 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:46:56,470 possible cell types: [('B cell', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('NK', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Monocyte', 1200), ('Plasma', 1000), ('Mast', 610), ('Dendritic', 592), ('Erythrocyte', 315)]
2020-11-03 15:46:59,832 training dataset shape: (5842, 17565)
2020-11-03 15:46:59,833 validation dataset shape: (7475, 17565)
2020-11-03 15:47:12,197 epoch: 1, train loss: 21.900548934936523, val loss: 66.90171813964844
2020-11-03 15:47:23,594 epoch: 2, train loss: 21.609745025634766, val loss: 66.69325256347656
2020-11-03 15:47:35,122 epoch: 3, train loss: 21.349

snRNAseq brca_train_cesc_val (2064, 19891) (8449, 22928)


2020-11-03 15:51:29,628 normalizing the expression counts for model training
2020-11-03 15:51:34,524 input dataset shape: (10513, 16509)
2020-11-03 15:51:34,526 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:51:34,528 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('NK', 1200), ('CD8 T cell', 1200), ('Monocyte', 1200), ('CD4 T cell', 1118), ('Epithelial', 1000), ('Plasma', 935), ('Endothelial', 732), ('B cell', 200), ('Treg', 200), ('Dendritic', 200), ('Mast', 113), ('Erythrocyte', 15)]
2020-11-03 15:51:37,271 training dataset shape: (5061, 16509)
2020-11-03 15:51:37,272 validation dataset shape: (5452, 16509)
2020-11-03 15:51:48,636 epoch: 1, train loss: 26.561389923095703, val loss: 58.81471633911133
2020-11-03 15:51:58,432 epoch: 2, train loss: 26.30562400817871, val loss: 58.5287971496582
2020-11-03 15:52:08,474

snRNAseq brca_train_hnscc_val (2064, 19891) (10288, 26929)


2020-11-03 15:55:27,531 normalizing the expression counts for model training
2020-11-03 15:55:33,773 input dataset shape: (12352, 17615)
2020-11-03 15:55:33,775 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:55:33,777 possible cell types: [('B cell', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Monocyte', 1200), ('Mast', 1064), ('Plasma', 1000), ('Erythrocyte', 288), ('Fibroblast', 200), ('Dendritic', 200)]
2020-11-03 15:55:36,854 training dataset shape: (5509, 17615)
2020-11-03 15:55:36,856 validation dataset shape: (6843, 17615)
2020-11-03 15:55:48,702 epoch: 1, train loss: 22.44988250732422, val loss: 61.126651763916016
2020-11-03 15:55:59,631 epoch: 2, train loss: 22.3106632232666, val loss: 60.916221618652344
2020-11-03 15:56:10,672 epoch: 3, train loss: 22.4382

snRNAseq brca_train_melanoma_val (2064, 19891) (6735, 23452)


2020-11-03 15:59:50,897 normalizing the expression counts for model training
2020-11-03 15:59:55,002 input dataset shape: (8799, 16275)
2020-11-03 15:59:55,004 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 15:59:55,006 possible cell types: [('B cell', 1200), ('Malignant', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Monocyte', 1200), ('Treg', 862), ('Dendritic', 741), ('NK', 538), ('Fibroblast', 321), ('Endothelial', 200), ('Plasma', 73), ('Mast', 64)]
2020-11-03 15:59:56,847 training dataset shape: (4273, 16275)
2020-11-03 15:59:56,848 validation dataset shape: (4526, 16275)
2020-11-03 16:00:05,971 epoch: 1, train loss: 26.017932891845703, val loss: 67.54197692871094
2020-11-03 16:00:14,243 epoch: 2, train loss: 25.277915954589844, val loss: 66.95667266845703
2020-11-03 16:00:22,541 epoch: 3, train loss: 24.970932006835938, val loss: 66.69097900390625
202

snRNAseq brca_train_pbmc_val (2064, 19891) (1698, 32738)


2020-11-03 16:03:02,032 normalizing the expression counts for model training
2020-11-03 16:03:04,097 input dataset shape: (3762, 18919)
2020-11-03 16:03:04,099 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Treg']
2020-11-03 16:03:04,100 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('B cell', 341), ('CD8 T cell', 306), ('NK', 263), ('Dendritic', 208), ('Endothelial', 200), ('Malignant', 200), ('Fibroblast', 200), ('Treg', 200), ('Mast', 64), ('Megakaryocyte', 3)]
2020-11-03 16:03:04,864 training dataset shape: (2454, 18919)
2020-11-03 16:03:04,865 validation dataset shape: (1308, 18919)




2020-11-03 16:03:10,535 5 out of the last 17 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:03:10,642 6 out of the last 18 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 16:03:11,635 epoch: 1, train loss: 42.160152435302734, val loss: 76.42351531982422
2020-11-03 16:03:17,219 epoch: 2, train loss: 41.45851135253906, val loss: 75.6661148071289
2020-11-03 16:03:22,719 epoch: 3, train loss: 40.74289321899414, val loss: 75.40392303466797
2020-11-03 16:03:28,274 epoch: 4, train loss: 40.5616455078125, val loss: 75.17089080810547
2020-11-03 16:03:33,870 epoch: 5, train loss: 40.452117919921875, val loss: 7

snRNAseq brca_train_pdac_val (2064, 19891) (15435, 28756)


2020-11-03 16:05:08,074 normalizing the expression counts for model training
2020-11-03 16:05:17,054 input dataset shape: (17499, 17904)
2020-11-03 16:05:17,056 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 16:05:17,060 possible cell types: [('B cell', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('NK', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Monocyte', 1200), ('Dendritic', 1200), ('Mast', 1064), ('Plasma', 1000), ('Acinar', 1000), ('Islet', 1000), ('Epithelial', 968), ('Erythrocyte', 412), ('Tuft', 55)]
2020-11-03 16:05:22,165 training dataset shape: (7797, 17904)
2020-11-03 16:05:22,167 validation dataset shape: (9702, 17904)
2020-11-03 16:05:37,181 epoch: 1, train loss: 19.83945083618164, val loss: 63.55595397949219
2020-11-03 16:05:51,089 epoch: 2, 

snRNAseq gbm_train_brca_val (1316, 19891) (11253, 27131)


2020-11-03 16:10:53,864 normalizing the expression counts for model training
2020-11-03 16:11:00,057 input dataset shape: (12569, 17565)
2020-11-03 16:11:00,059 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 16:11:00,061 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1072), ('B cell', 1044), ('CD8 T cell', 1000), ('NK', 1000), ('Treg', 1000), ('CD4 T cell', 1000), ('Monocyte', 1000), ('Plasma', 1000), ('Mast', 546), ('Dendritic', 392), ('Erythrocyte', 315), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200)]
2020-11-03 16:11:03,421 training dataset shape: (6371, 17565)
2020-11-03 16:11:03,422 validation dataset shape: (6198, 17565)
2020-11-03 16:11:17,222 epoch: 1, train loss: 22.93305015563965, val loss: 43.824562072753906
2020-11-03

snRNAseq gbm_train_cesc_val (1316, 19891) (8449, 22928)


2020-11-03 16:15:53,440 normalizing the expression counts for model training
2020-11-03 16:15:58,094 input dataset shape: (9765, 16509)
2020-11-03 16:15:58,096 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-03 16:15:58,097 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Monocyte', 1000), ('CD8 T cell', 1000), ('Epithelial', 1000), ('NK', 1000), ('Plasma', 935), ('CD4 T cell', 918), ('Endothelial', 604), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('Mast', 49), ('B cell', 44), ('Erythrocyte', 15)]
2020-11-03 16:16:00,704 training dataset shape: (4997, 16509)
2020-11-03 16:16:00,705 validation dataset shape: (4768, 16509)




2020-11-03 16:16:09,437 5 out of the last 84 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:16:10,131 5 out of the last 16 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 16:16:11,831 epoch: 1, train loss: 27.360685348510742, val loss: 38.5082893371582
2020-11-03 16:16:21,440 epoch: 2, train loss: 26.806385040283203, val loss: 38.307708740234375
2020-11-03 16:16:31,132 epoch: 3, train loss: 26.37519073486328, val loss: 38.32129669189453
2020-11-03 16:16:40,914 epoch: 4, train loss: 25.862516403198242, val loss: 38.1708984375
2020-11-03 16:16:50,585 epoch: 5, train loss: 25.735336303710938, val loss: 3

snRNAseq gbm_train_hnscc_val (1316, 19891) (10288, 26929)


2020-11-03 16:19:48,892 normalizing the expression counts for model training
2020-11-03 16:19:54,616 input dataset shape: (11604, 17615)
2020-11-03 16:19:54,617 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 16:19:54,620 possible cell types: [('Malignant', 1200), ('Endothelial', 1072), ('B cell', 1044), ('Treg', 1000), ('CD4 T cell', 1000), ('Plasma', 1000), ('Mast', 1000), ('CD8 T cell', 1000), ('Monocyte', 1000), ('NK', 1000), ('Erythrocyte', 288), ('Fibroblast', 200), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200)]
2020-11-03 16:19:57,530 training dataset shape: (5955, 17615)
2020-11-03 16:19:57,531 validation dataset shape: (5649, 17615)
2020-11-03 16:20:10,442 epoch: 1, train loss: 23.7562198638916, val loss: 39.92554473876953
2020-11-03 16:20:22,549 epoch: 2, train loss:

snRNAseq gbm_train_melanoma_val (1316, 19891) (6735, 23452)


2020-11-03 16:24:33,489 normalizing the expression counts for model training
2020-11-03 16:24:37,169 input dataset shape: (8051, 16275)
2020-11-03 16:24:37,171 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 16:24:37,173 possible cell types: [('Malignant', 1200), ('B cell', 1044), ('CD8 T cell', 1000), ('Monocyte', 1000), ('CD4 T cell', 1000), ('Treg', 662), ('Dendritic', 541), ('NK', 338), ('Fibroblast', 321), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('Plasma', 73), ('Endothelial', 72)]
2020-11-03 16:24:38,923 training dataset shape: (4458, 16275)
2020-11-03 16:24:38,924 validation dataset shape: (3593, 16275)
2020-11-03 16:24:48,858 epoch: 1, train loss: 25.84004783630371, val loss: 41.271270751953125
2020-11-03 16:24:57,331 epoch: 2, train loss: 25.39883804321289, val loss: 41.

snRNAseq gbm_train_pbmc_val (1316, 19891) (1698, 32738)


2020-11-03 16:27:56,524 normalizing the expression counts for model training
2020-11-03 16:27:58,122 input dataset shape: (3014, 18919)
2020-11-03 16:27:58,123 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-03 16:27:58,124 possible cell types: [('CD4 T cell', 943), ('Monocyte', 434), ('Malignant', 200), ('Fibroblast', 200), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('B cell', 185), ('CD8 T cell', 106), ('Endothelial', 72), ('NK', 63), ('Dendritic', 8), ('Megakaryocyte', 3)]
2020-11-03 16:27:58,727 training dataset shape: (2022, 18919)
2020-11-03 16:27:58,728 validation dataset shape: (992, 18919)




2020-11-03 16:28:03,520 5 out of the last 40 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:28:03,609 6 out of the last 41 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:28:03,703 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:28:03,818 7 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:28:03,943 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:28:04,067 7 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:28:04,187 7 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 16:28:05,397 epoch: 1, train loss: 44.413169860839844, val loss: 51.10897445678711
2020-11-03 16:28:10,467 epoch: 2, train loss: 43.411705017089844, val loss: 50.264678955078125
2020-11-03 16:28:15,295 epoch: 3, train loss: 42.48542404174805, val loss: 50.13445281982422
2020-11-03 16:28:20,025 epoch: 4, train loss: 42.08583068847656, val loss: 50.00396728515625
2020-11-03 16:28:24,898 epoch: 5, train loss: 42.03630065917969, val loss

snRNAseq gbm_train_pdac_val (1316, 19891) (15435, 28756)


2020-11-03 16:29:45,347 normalizing the expression counts for model training
2020-11-03 16:29:53,959 input dataset shape: (16751, 17904)
2020-11-03 16:29:53,961 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-03 16:29:53,963 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1072), ('B cell', 1044), ('Monocyte', 1000), ('Plasma', 1000), ('NK', 1000), ('Dendritic', 1000), ('CD8 T cell', 1000), ('Treg', 1000), ('Acinar', 1000), ('Islet', 1000), ('CD4 T cell', 1000), ('Mast', 1000), ('Epithelial', 968), ('Erythrocyte', 412), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('Tuft', 55)]
2020-11-03 16:29:59,082 training dataset shape: (8421, 17904)
2020-11-03 16:29:59,083 validation dataset shape: (8330

snRNAseq brca_train_brca_val (2064, 19891) (9490, 29175)


2020-11-03 16:35:53,984 normalizing the expression counts for model training
2020-11-03 16:35:59,782 input dataset shape: (11554, 17494)
2020-11-03 16:35:59,784 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 16:35:59,787 possible cell types: [('B cell', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('Treg', 1200), ('Monocyte', 1200), ('Plasma', 1000), ('NK', 482), ('Dendritic', 217), ('Mast', 182), ('Adipocyte', 73)]
2020-11-03 16:36:02,585 training dataset shape: (5143, 17494)
2020-11-03 16:36:02,586 validation dataset shape: (6411, 17494)
2020-11-03 16:36:13,865 epoch: 1, train loss: 26.717819213867188, val loss: 64.1409912109375
2020-11-03 16:36:24,178 epoch: 2, train loss: 26.53854751586914, val loss: 63.79296875
2020-11-03 16:36:34,460 epoch: 3, train loss: 26.175870895385742, 

snRNAseq brca_train_ccrcc_val (2064, 19891) (8605, 33538)


2020-11-03 16:40:04,743 normalizing the expression counts for model training
2020-11-03 16:40:10,550 input dataset shape: (10669, 18895)
2020-11-03 16:40:10,552 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 16:40:10,554 possible cell types: [('Endothelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('NK', 1200), ('CD4 T cell', 1200), ('Monocyte', 1200), ('CD8 T cell', 1046), ('Epithelial', 1000), ('Treg', 574), ('Dendritic', 490), ('B cell', 200), ('Plasma', 95), ('Mast', 64)]
2020-11-03 16:40:13,274 training dataset shape: (4876, 18895)
2020-11-03 16:40:13,275 validation dataset shape: (5793, 18895)
2020-11-03 16:40:24,764 epoch: 1, train loss: 25.724599838256836, val loss: 75.06639099121094
2020-11-03 16:40:35,123 epoch: 2, train loss: 25.339773178100586, val loss: 73.91915130615234
2020-11-03 16:40:45,716 epoch: 3, train loss: 25.066381454

snRNAseq brca_train_gbm_val (2064, 19891) (6810, 29748)


2020-11-03 16:44:14,870 normalizing the expression counts for model training
2020-11-03 16:44:19,263 input dataset shape: (8874, 17613)
2020-11-03 16:44:19,265 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells', 'Treg']
2020-11-03 16:44:19,266 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Microglia', 1000), ('Neuron', 1000), ('Oligodendrocytes', 1000), ('T cells', 1000), ('Endothelial', 581), ('B cell', 578), ('Monocyte', 251), ('NK', 200), ('CD8 T cell', 200), ('CD4 T cell', 200), ('Treg', 200), ('Dendritic', 200), ('Mast', 64)]
2020-11-03 16:44:21,403 training dataset shape: (4615, 17613)
2020-11-03 16:44:21,404 validation dataset shape: (4259, 17613)




2020-11-03 16:44:29,801 5 out of the last 81 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 16:44:29,924 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 16:44:31,944 epoch: 1, train loss: 30.926380157470703, val loss: 70.54362487792969
2020-11-03 16:44:41,149 epoch: 2, train loss: 30.269176483154297, val loss: 69.90587615966797
2020-11-03 16:44:50,588 epoch: 3, train loss: 29.963459014892578, val loss: 69.78934478759766
2020-11-03 16:44:59,993 epoch: 4, train loss: 29.70996856689453, val loss: 69.43177795410156
2020-11-03 16:45:09,272 epoch: 5, train loss: 29.68734359741211, val loss

snRNAseq gbm_train_brca_val (1316, 19891) (9490, 29175)


2020-11-03 16:47:58,182 normalizing the expression counts for model training
2020-11-03 16:48:03,488 input dataset shape: (10806, 17494)
2020-11-03 16:48:03,490 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 16:48:03,492 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1072), ('B cell', 1044), ('CD4 T cell', 1000), ('Monocyte', 1000), ('CD8 T cell', 1000), ('Treg', 1000), ('Plasma', 1000), ('NK', 282), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('Mast', 118), ('Adipocyte', 73), ('Dendritic', 17)]
2020-11-03 16:48:06,423 training dataset shape: (5469, 17494)
2020-11-03 16:48:06,424 validation dataset shape: (5337, 17494)




2020-11-03 16:48:16,320 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 16:48:18,497 epoch: 1, train loss: 28.335813522338867, val loss: 39.663787841796875
2020-11-03 16:48:29,023 epoch: 2, train loss: 28.073688507080078, val loss: 39.51173782348633
2020-11-03 16:48:39,661 epoch: 3, train loss: 27.82869529724121, val loss: 39.36353302001953
2020-11-03 16:48:50,474 epoch: 4, train loss: 27.52652931213379, val loss: 39.44798278808594
2020-11-03 16:49:01,186 epoch: 5, train loss: 27.240171432495117, val los

snRNAseq gbm_train_ccrcc_val (1316, 19891) (8605, 33538)


2020-11-03 16:52:18,458 normalizing the expression counts for model training
2020-11-03 16:52:23,857 input dataset shape: (9921, 18895)
2020-11-03 16:52:23,859 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 16:52:23,861 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Endothelial', 1072), ('Monocyte', 1000), ('NK', 1000), ('CD4 T cell', 1000), ('Epithelial', 1000), ('CD8 T cell', 846), ('Treg', 374), ('Dendritic', 290), ('Neuron', 200), ('Microglia', 200), ('T cells', 200), ('Oligodendrocytes', 200), ('Plasma', 95), ('B cell', 44)]
2020-11-03 16:52:26,373 training dataset shape: (5167, 18895)
2020-11-03 16:52:26,374 validation dataset shape: (4754, 18895)




2020-11-03 16:52:36,319 5 out of the last 86 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 16:52:38,618 epoch: 1, train loss: 26.56262969970703, val loss: 50.70489501953125
2020-11-03 16:52:49,401 epoch: 2, train loss: 26.28961944580078, val loss: 50.435646057128906
2020-11-03 16:53:00,408 epoch: 3, train loss: 25.77785301208496, val loss: 50.2794303894043
2020-11-03 16:53:11,451 epoch: 4, train loss: 25.470720291137695, val loss: 50.129302978515625
2020-11-03 16:53:22,585 epoch: 5, train loss: 25.32710075378418, val loss:

snRNAseq gbm_train_gbm_val (1316, 19891) (6810, 29748)


2020-11-03 16:56:37,669 normalizing the expression counts for model training
2020-11-03 16:56:41,782 input dataset shape: (8126, 17613)
2020-11-03 16:56:41,783 possible cell types: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-03 16:56:41,785 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Neuron', 1200), ('Microglia', 1200), ('T cells', 1200), ('Oligodendrocytes', 1200), ('Endothelial', 453), ('B cell', 422), ('Monocyte', 51)]
2020-11-03 16:56:43,517 training dataset shape: (3567, 17613)
2020-11-03 16:56:43,519 validation dataset shape: (4559, 17613)
2020-11-03 16:56:51,654 epoch: 1, train loss: 31.452207565307617, val loss: 42.747493743896484
2020-11-03 16:56:58,967 epoch: 2, train loss: 30.36213493347168, val loss: 41.8216552734375
2020-11-03 16:57:06,214 epoch: 3, train loss: 30.054365158081055, val loss: 41.75782012939453
2020-11-03 16:57:13,407 epoch: 4, train loss: 29.663301467895

snRNAseq brca_train_brca_val (2455, 29175) (11253, 27131)


2020-11-03 16:59:34,446 normalizing the expression counts for model training
2020-11-03 16:59:44,463 input dataset shape: (13708, 25674)
2020-11-03 16:59:44,465 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 16:59:44,467 possible cell types: [('CD4 T cell', 1200), ('Treg', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('B cell', 1200), ('Monocyte', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('Plasma', 1200), ('Mast', 746), ('Dendritic', 447), ('Erythrocyte', 315), ('Adipocyte', 200)]
2020-11-03 16:59:48,720 training dataset shape: (6131, 25674)
2020-11-03 16:59:48,721 validation dataset shape: (7577, 25674)
2020-11-03 17:00:05,205 epoch: 1, train loss: 31.34288787841797, val loss: 51.92230224609375
2020-11-03 17:00:20,683 epoch: 2, train loss: 30.811159133911133, val loss: 51.163177490234375
2020-11-03 17:00:36

snRNAseq brca_train_cesc_val (2455, 29175) (8449, 22928)


2020-11-03 17:05:50,595 normalizing the expression counts for model training
2020-11-03 17:05:57,481 input dataset shape: (10904, 22001)
2020-11-03 17:05:57,483 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:05:57,485 possible cell types: [('Malignant', 1200), ('NK', 1200), ('Monocyte', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('Plasma', 1135), ('CD4 T cell', 1118), ('Epithelial', 1000), ('Endothelial', 732), ('Mast', 249), ('Treg', 200), ('Adipocyte', 200), ('B cell', 200), ('Dendritic', 55), ('Erythrocyte', 15)]
2020-11-03 17:06:00,597 training dataset shape: (5203, 22001)
2020-11-03 17:06:00,598 validation dataset shape: (5701, 22001)




2020-11-03 17:06:11,602 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 17:06:11,912 5 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 17:06:13,741 epoch: 1, train loss: 33.40736770629883, val loss: 39.75957489013672
2020-11-03 17:06:25,790 epoch: 2, train loss: 32.67723846435547, val loss: 38.8954963684082
2020-11-03 17:06:37,854 epoch: 3, train loss: 32.04677200317383, val loss: 38.36486053466797
2020-11-03 17:06:49,913 epoch: 4, train loss: 31.65525245666504, val loss: 38.138641357421875
2020-11-03 17:07:02,097 epoch: 5, train loss: 31.384477615356445, val loss: 

snRNAseq brca_train_hnscc_val (2455, 29175) (10288, 26929)


2020-11-03 17:10:44,475 normalizing the expression counts for model training
2020-11-03 17:10:53,581 input dataset shape: (12743, 25299)
2020-11-03 17:10:53,583 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:10:53,585 possible cell types: [('CD4 T cell', 1200), ('Treg', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('B cell', 1200), ('Mast', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Plasma', 1200), ('Erythrocyte', 288), ('Adipocyte', 200), ('Fibroblast', 200), ('Dendritic', 55)]
2020-11-03 17:10:57,945 training dataset shape: (5553, 25299)
2020-11-03 17:10:57,946 validation dataset shape: (7190, 25299)




2020-11-03 17:11:11,526 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 17:11:13,467 epoch: 1, train loss: 29.571866989135742, val loss: 43.53791809082031
2020-11-03 17:11:27,734 epoch: 2, train loss: 28.977537155151367, val loss: 42.6842041015625
2020-11-03 17:11:41,935 epoch: 3, train loss: 28.510984420776367, val loss: 42.15034484863281
2020-11-03 17:11:56,180 epoch: 4, train loss: 28.204927444458008, val loss: 41.826416015625
2020-11-03 17:12:10,595 epoch: 5, train loss: 27.96969223022461, val loss: 

snRNAseq brca_train_melanoma_val (2455, 29175) (6735, 23452)


2020-11-03 17:16:34,327 normalizing the expression counts for model training
2020-11-03 17:16:39,819 input dataset shape: (9190, 21018)
2020-11-03 17:16:39,821 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:16:39,823 possible cell types: [('CD4 T cell', 1200), ('Malignant', 1200), ('B cell', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Treg', 862), ('Dendritic', 596), ('NK', 538), ('Fibroblast', 321), ('Plasma', 273), ('Endothelial', 200), ('Adipocyte', 200), ('Mast', 200)]
2020-11-03 17:16:42,127 training dataset shape: (4473, 21018)
2020-11-03 17:16:42,128 validation dataset shape: (4717, 21018)
2020-11-03 17:16:52,818 epoch: 1, train loss: 33.07181930541992, val loss: 53.37400817871094
2020-11-03 17:17:02,695 epoch: 2, train loss: 32.421756744384766, val loss: 52.26659393310547
2020-11-03 17:17:12,387 epoch: 3, train loss: 31.86518478393554

snRNAseq brca_train_pbmc_val (2455, 29175) (1698, 32738)


2020-11-03 17:20:23,531 normalizing the expression counts for model training
2020-11-03 17:20:25,745 input dataset shape: (4153, 18731)
2020-11-03 17:20:25,746 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:20:25,748 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('B cell', 341), ('CD8 T cell', 306), ('NK', 263), ('Treg', 200), ('Endothelial', 200), ('Malignant', 200), ('Adipocyte', 200), ('Mast', 200), ('Fibroblast', 200), ('Plasma', 200), ('Dendritic', 63), ('Megakaryocyte', 3)]
2020-11-03 17:20:26,623 training dataset shape: (2748, 18731)
2020-11-03 17:20:26,624 validation dataset shape: (1405, 18731)




2020-11-03 17:20:32,839 5 out of the last 18 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 17:20:32,929 6 out of the last 19 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 17:20:33,036 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 17:20:34,233 epoch: 1, train loss: 36.99061584472656, val loss: 45.09469985961914
2020-11-03 17:20:40,520 epoch: 2, train loss: 35.850852966308594, val loss: 44.658424377441406
2020-11-03 17:20:46,802 epoch: 3, train loss: 35.36565399169922, val loss: 44.37563705444336
2020-11-03 17:20:53,023 epoch: 4, train loss: 34.936363220214844, val loss: 43.845977783203125
2020-11-03 17:20:59,289 epoch: 5, train loss: 34.70332717895508, val los

snRNAseq brca_train_pdac_val (2455, 29175) (15435, 28756)


2020-11-03 17:22:45,625 normalizing the expression counts for model training
2020-11-03 17:22:59,049 input dataset shape: (17890, 26783)
2020-11-03 17:22:59,051 possible cell types: ['Acinar', 'Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 17:22:59,054 possible cell types: [('CD4 T cell', 1200), ('Treg', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('B cell', 1200), ('Mast', 1200), ('Monocyte', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('Plasma', 1200), ('Dendritic', 1055), ('Acinar', 1000), ('Islet', 1000), ('Epithelial', 968), ('Erythrocyte', 412), ('Adipocyte', 200), ('Tuft', 55)]
2020-11-03 17:23:05,362 training dataset shape: (7956, 26783)
2020-11-03 17:23:05,363 validation dataset shape: (9934, 26783)
2020-11-03 17:23:27,600 epoch: 1, train loss: 27.639474868774414, val loss: 44.517738342285156

snRNAseq ccrcc_train_brca_val (2113, 33538) (11253, 27131)


2020-11-03 17:31:13,985 normalizing the expression counts for model training
2020-11-03 17:31:24,291 input dataset shape: (13366, 27131)
2020-11-03 17:31:24,293 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:31:24,295 possible cell types: [('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('Fibroblast', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Treg', 1200), ('CD4 T cell', 1200), ('Plasma', 1113), ('B cell', 1000), ('Dendritic', 592), ('Mast', 546), ('Erythrocyte', 315), ('Epithelial', 200)]
2020-11-03 17:31:28,476 training dataset shape: (5974, 27131)
2020-11-03 17:31:28,477 validation dataset shape: (7392, 27131)
2020-11-03 17:31:45,502 epoch: 1, train loss: 30.73352813720703, val loss: 56.238380432128906
2020-11-03 17:32:01,855 epoch: 2, train loss: 30.271930694580078, val loss: 55.377994537353516
2020-11-03 17:32

snRNAseq ccrcc_train_cesc_val (2113, 33538) (8449, 22928)


2020-11-03 17:37:44,343 normalizing the expression counts for model training
2020-11-03 17:37:51,242 input dataset shape: (10562, 22919)
2020-11-03 17:37:51,244 possible cell types: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:37:51,245 possible cell types: [('Malignant', 1200), ('NK', 1200), ('Fibroblast', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Epithelial', 1200), ('CD4 T cell', 1118), ('Plasma', 1048), ('Endothelial', 732), ('Dendritic', 200), ('Treg', 200), ('Mast', 49), ('Erythrocyte', 15)]
2020-11-03 17:37:54,432 training dataset shape: (4857, 22919)
2020-11-03 17:37:54,433 validation dataset shape: (5705, 22919)




2020-11-03 17:38:06,066 5 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 17:38:07,378 epoch: 1, train loss: 33.077545166015625, val loss: 41.215389251708984
2020-11-03 17:38:19,230 epoch: 2, train loss: 32.10014724731445, val loss: 39.9678955078125
2020-11-03 17:38:31,256 epoch: 3, train loss: 31.500370025634766, val loss: 39.08140563964844
2020-11-03 17:38:43,026 epoch: 4, train loss: 31.044784545898438, val loss: 38.54410171508789
2020-11-03 17:38:54,903 epoch: 5, train loss: 30.765178680419922, val los

snRNAseq ccrcc_train_hnscc_val (2113, 33538) (10288, 26929)


2020-11-03 17:42:30,058 normalizing the expression counts for model training
2020-11-03 17:42:39,528 input dataset shape: (12401, 26918)
2020-11-03 17:42:39,530 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:42:39,532 possible cell types: [('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Treg', 1200), ('CD4 T cell', 1200), ('Plasma', 1113), ('B cell', 1000), ('Mast', 1000), ('Erythrocyte', 288), ('Fibroblast', 200), ('Dendritic', 200), ('Epithelial', 200)]
2020-11-03 17:42:43,536 training dataset shape: (5655, 26918)
2020-11-03 17:42:43,537 validation dataset shape: (6746, 26918)
2020-11-03 17:42:59,478 epoch: 1, train loss: 28.516225814819336, val loss: 46.44199752807617
2020-11-03 17:43:14,275 epoch: 2, train loss: 27.908416748046875, val loss: 45.607303619384766
2020-11-03 17:43

snRNAseq ccrcc_train_melanoma_val (2113, 33538) (6735, 23452)


2020-11-03 17:48:34,276 normalizing the expression counts for model training
2020-11-03 17:48:39,883 input dataset shape: (8848, 21975)
2020-11-03 17:48:39,884 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:48:39,886 possible cell types: [('Malignant', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('CD4 T cell', 1200), ('B cell', 1000), ('Treg', 862), ('Dendritic', 741), ('NK', 538), ('Fibroblast', 321), ('Endothelial', 200), ('Epithelial', 200), ('Plasma', 186)]
2020-11-03 17:48:42,023 training dataset shape: (4462, 21975)
2020-11-03 17:48:42,024 validation dataset shape: (4386, 21975)
2020-11-03 17:48:53,259 epoch: 1, train loss: 31.64928436279297, val loss: 49.50714874267578
2020-11-03 17:49:03,561 epoch: 2, train loss: 31.106340408325195, val loss: 48.60068893432617
2020-11-03 17:49:13,934 epoch: 3, train loss: 30.543949127197266, val loss: 47.93339

snRNAseq ccrcc_train_pbmc_val (2113, 33538) (1698, 32738)


2020-11-03 17:52:32,851 normalizing the expression counts for model training
2020-11-03 17:52:35,045 input dataset shape: (3811, 20453)
2020-11-03 17:52:35,047 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 17:52:35,048 possible cell types: [('CD4 T cell', 1143), ('Monocyte', 634), ('CD8 T cell', 306), ('NK', 263), ('Dendritic', 208), ('Endothelial', 200), ('Malignant', 200), ('Fibroblast', 200), ('Treg', 200), ('Epithelial', 200), ('B cell', 141), ('Plasma', 113), ('Megakaryocyte', 3)]
2020-11-03 17:52:35,884 training dataset shape: (2512, 20453)
2020-11-03 17:52:35,885 validation dataset shape: (1299, 20453)




2020-11-03 17:52:42,120 5 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 17:52:42,208 6 out of the last 16 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 17:52:42,316 7 out of the last 17 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 17:52:42,550 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 17:52:43,456 epoch: 1, train loss: 36.233917236328125, val loss: 41.19053268432617
2020-11-03 17:52:49,411 epoch: 2, train loss: 34.893577575683594, val loss: 40.72213363647461
2020-11-03 17:52:55,599 epoch: 3, train loss: 34.17430877685547, val loss: 40.23706817626953
2020-11-03 17:53:01,685 epoch: 4, train loss: 33.81786346435547, val loss: 39.68116760253906
2020-11-03 17:53:07,675 epoch: 5, train loss: 33.135719299316406, val loss

snRNAseq ccrcc_train_pdac_val (2113, 33538) (15435, 28756)


2020-11-03 17:54:49,908 normalizing the expression counts for model training
2020-11-03 17:55:04,414 input dataset shape: (17548, 28756)
2020-11-03 17:55:04,417 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-03 17:55:04,420 possible cell types: [('Endothelial', 1200), ('Malignant', 1200), ('NK', 1200), ('Fibroblast', 1200), ('Monocyte', 1200), ('Dendritic', 1200), ('CD8 T cell', 1200), ('Treg', 1200), ('CD4 T cell', 1200), ('Epithelial', 1168), ('Plasma', 1113), ('B cell', 1000), ('Acinar', 1000), ('Islet', 1000), ('Mast', 1000), ('Erythrocyte', 412), ('Tuft', 55)]
2020-11-03 17:55:10,619 training dataset shape: (7804, 28756)
2020-11-03 17:55:10,620 validation dataset shape: (9744, 28756)
2020-11-03 17:55:31,668 epoch: 1, train loss: 26.78998374938965, val loss: 47.899208068847656
2020-11-03 17:55:51,762 epoch: 2

snRNAseq gbm_train_brca_val (1689, 29748) (11253, 27131)


2020-11-03 18:03:01,412 normalizing the expression counts for model training
2020-11-03 18:03:10,799 input dataset shape: (12942, 25705)
2020-11-03 18:03:10,801 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 18:03:10,803 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('B cell', 1200), ('Endothelial', 1200), ('Monocyte', 1089), ('CD8 T cell', 1000), ('NK', 1000), ('Treg', 1000), ('CD4 T cell', 1000), ('Plasma', 1000), ('Mast', 546), ('Dendritic', 392), ('Erythrocyte', 315), ('Neuron', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200)]
2020-11-03 18:03:14,946 training dataset shape: (6388, 25705)
2020-11-03 18:03:14,947 validation dataset shape: (6554, 25705)
2020-11-03 18:03:33,117 epoch: 1, train loss: 33.125064849853516, val loss: 60.002140045166016
2020-11-0

snRNAseq gbm_train_cesc_val (1689, 29748) (8449, 22928)


2020-11-03 18:09:39,674 normalizing the expression counts for model training
2020-11-03 18:09:46,003 input dataset shape: (10138, 21981)
2020-11-03 18:09:46,005 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-03 18:09:46,007 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('Monocyte', 1089), ('CD8 T cell', 1000), ('Epithelial', 1000), ('NK', 1000), ('Plasma', 935), ('CD4 T cell', 918), ('Endothelial', 732), ('Neuron', 200), ('B cell', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200), ('Mast', 49), ('Erythrocyte', 15)]
2020-11-03 18:09:49,085 training dataset shape: (5319, 21981)
2020-11-03 18:09:49,086 validation dataset shape: (4819, 21981)




2020-11-03 18:10:00,920 5 out of the last 17 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:10:01,050 6 out of the last 19 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:10:01,316 5 out of the last 15 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 18:10:02,762 epoch: 1, train loss: 34.6427001953125, val loss: 47.716331481933594
2020-11-03 18:10:15,148 epoch: 2, train loss: 33.451114654541016, val loss: 46.559818267822266
2020-11-03 18:10:27,672 epoch: 3, train loss: 32.84257507324219, val loss: 45.953163146972656
2020-11-03 18:10:40,163 epoch: 4, train loss: 32.46458053588867, val loss: 45.58187484741211
2020-11-03 18:10:52,762 epoch: 5, train loss: 32.15856170654297, val loss

snRNAseq gbm_train_hnscc_val (1689, 29748) (10288, 26929)


2020-11-03 18:14:37,810 normalizing the expression counts for model training
2020-11-03 18:14:46,392 input dataset shape: (11977, 25389)
2020-11-03 18:14:46,394 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 18:14:46,396 possible cell types: [('Malignant', 1200), ('B cell', 1200), ('Endothelial', 1200), ('Monocyte', 1089), ('Treg', 1000), ('CD4 T cell', 1000), ('Plasma', 1000), ('Mast', 1000), ('CD8 T cell', 1000), ('NK', 1000), ('Erythrocyte', 288), ('Fibroblast', 200), ('Neuron', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200)]
2020-11-03 18:14:50,054 training dataset shape: (5962, 25389)
2020-11-03 18:14:50,055 validation dataset shape: (6015, 25389)
2020-11-03 18:15:06,667 epoch: 1, train loss: 30.36560821533203, val loss: 52.632293701171875
2020-11-03 18:15:22,561 epoch: 2, train los

snRNAseq gbm_train_melanoma_val (1689, 29748) (6735, 23452)


2020-11-03 18:20:48,727 normalizing the expression counts for model training
2020-11-03 18:20:53,811 input dataset shape: (8424, 21069)
2020-11-03 18:20:53,813 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 18:20:53,815 possible cell types: [('Malignant', 1200), ('B cell', 1200), ('Monocyte', 1089), ('CD8 T cell', 1000), ('CD4 T cell', 1000), ('Treg', 662), ('Dendritic', 541), ('NK', 338), ('Fibroblast', 321), ('Neuron', 200), ('Oligodendrocytes', 200), ('Endothelial', 200), ('T cells', 200), ('Microglia', 200), ('Plasma', 73)]
2020-11-03 18:20:55,944 training dataset shape: (4577, 21069)
2020-11-03 18:20:55,945 validation dataset shape: (3847, 21069)
2020-11-03 18:21:07,765 epoch: 1, train loss: 34.257381439208984, val loss: 65.53384399414062
2020-11-03 18:21:18,507 epoch: 2, train loss: 33.492042541503906, val loss: 6

snRNAseq gbm_train_pbmc_val (1689, 29748) (1698, 32738)


2020-11-03 18:24:54,855 normalizing the expression counts for model training
2020-11-03 18:24:56,713 input dataset shape: (3387, 18949)
2020-11-03 18:24:56,714 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-03 18:24:56,715 possible cell types: [('CD4 T cell', 943), ('Monocyte', 523), ('B cell', 341), ('Malignant', 200), ('Fibroblast', 200), ('Neuron', 200), ('Oligodendrocytes', 200), ('Endothelial', 200), ('T cells', 200), ('Microglia', 200), ('CD8 T cell', 106), ('NK', 63), ('Dendritic', 8), ('Megakaryocyte', 3)]
2020-11-03 18:24:57,476 training dataset shape: (2222, 18949)
2020-11-03 18:24:57,477 validation dataset shape: (1165, 18949)




2020-11-03 18:25:02,397 5 out of the last 44 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:25:02,488 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:25:02,610 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:25:02,797 6 out of the last 14 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:25:02,918 6 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 18:25:04,265 epoch: 1, train loss: 40.853057861328125, val loss: 59.82722473144531
2020-11-03 18:25:09,600 epoch: 2, train loss: 39.68557357788086, val loss: 58.57097625732422
2020-11-03 18:25:14,970 epoch: 3, train loss: 38.71845245361328, val loss: 58.2583122253418
2020-11-03 18:25:20,323 epoch: 4, train loss: 38.14030456542969, val loss: 57.52939224243164
2020-11-03 18:25:25,717 epoch: 5, train loss: 37.666927337646484, val loss: 

snRNAseq gbm_train_pdac_val (1689, 29748) (15435, 28756)


2020-11-03 18:26:56,757 normalizing the expression counts for model training
2020-11-03 18:27:10,003 input dataset shape: (17124, 27015)
2020-11-03 18:27:10,005 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-03 18:27:10,009 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('B cell', 1200), ('Endothelial', 1200), ('Monocyte', 1089), ('Plasma', 1000), ('NK', 1000), ('Dendritic', 1000), ('CD8 T cell', 1000), ('Treg', 1000), ('Acinar', 1000), ('Islet', 1000), ('CD4 T cell', 1000), ('Mast', 1000), ('Epithelial', 968), ('Erythrocyte', 412), ('Neuron', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200), ('Tuft', 55)]
2020-11-03 18:27:16,108 training dataset shape: (8413, 27015)
2020-11-03 18:27:16,109 validation dataset shape: (8711



2020-11-03 18:27:37,586 5 out of the last 17 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 18:27:39,589 epoch: 1, train loss: 28.684770584106445, val loss: 51.10179901123047
2020-11-03 18:28:02,191 epoch: 2, train loss: 27.639467239379883, val loss: 50.546424865722656
2020-11-03 18:28:24,737 epoch: 3, train loss: 27.10993766784668, val loss: 50.12910079956055
2020-11-03 18:28:47,245 epoch: 4, train loss: 26.857511520385742, val loss: 49.835723876953125
2020-11-03 18:29:10,242 epoch: 5, train loss: 26.68341064453125, val lo

snRNAseq brca_train_brca_val (2455, 29175) (9028, 19891)


2020-11-03 18:36:03,936 normalizing the expression counts for model training
2020-11-03 18:36:09,562 input dataset shape: (11483, 17494)
2020-11-03 18:36:09,564 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 18:36:09,566 possible cell types: [('CD4 T cell', 1200), ('Treg', 1200), ('Endothelial', 1200), ('Malignant', 1200), ('B cell', 1200), ('Monocyte', 1200), ('Fibroblast', 1200), ('CD8 T cell', 1200), ('Dendritic', 831), ('NK', 430), ('Mast', 222), ('Adipocyte', 200), ('Plasma', 200)]
2020-11-03 18:36:12,373 training dataset shape: (5225, 17494)
2020-11-03 18:36:12,374 validation dataset shape: (6258, 17494)
2020-11-03 18:36:23,657 epoch: 1, train loss: 31.34042739868164, val loss: 17.579299926757812
2020-11-03 18:36:34,097 epoch: 2, train loss: 30.843128204345703, val loss: 15.822182655334473
2020-11-03 18:36:44,766 epoch: 3, train loss: 30.54662132

snRNAseq brca_train_gbm_val (2455, 29175) (5650, 19891)


2020-11-03 18:40:28,964 normalizing the expression counts for model training
2020-11-03 18:40:32,988 input dataset shape: (8105, 17494)
2020-11-03 18:40:32,990 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 18:40:32,992 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('Endothelial', 229), ('B cell', 211), ('CD4 T cell', 200), ('Treg', 200), ('NK', 200), ('Adipocyte', 200), ('Mast', 200), ('Monocyte', 200), ('CD8 T cell', 200), ('Plasma', 200), ('Dendritic', 55)]
2020-11-03 18:40:34,908 training dataset shape: (4600, 17494)
2020-11-03 18:40:34,909 validation dataset shape: (3505, 17494)




2020-11-03 18:40:43,176 5 out of the last 80 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:40:43,324 5 out of the last 12 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:40:43,416 6 out of the last 13 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 18:40:45,574 epoch: 1, train loss: 31.27610206604004, val loss: 19.71472930908203
2020-11-03 18:40:54,709 epoch: 2, train loss: 30.395530700683594, val loss: 17.537490844726562
2020-11-03 18:41:04,081 epoch: 3, train loss: 30.169532775878906, val loss: 17.44031524658203
2020-11-03 18:41:13,334 epoch: 4, train loss: 29.893611907958984, val loss: 17.27083396911621
2020-11-03 18:41:22,585 epoch: 5, train loss: 29.73757553100586, val los

snRNAseq ccrcc_train_brca_val (2113, 33538) (9028, 19891)


2020-11-03 18:44:13,830 normalizing the expression counts for model training
2020-11-03 18:44:19,865 input dataset shape: (11141, 18895)
2020-11-03 18:44:19,867 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-03 18:44:19,869 possible cell types: [('Endothelial', 1200), ('Malignant', 1200), ('Fibroblast', 1200), ('Monocyte', 1200), ('CD8 T cell', 1200), ('Treg', 1200), ('CD4 T cell', 1200), ('B cell', 1000), ('Dendritic', 976), ('NK', 430), ('Epithelial', 200), ('Plasma', 113), ('Mast', 22)]
2020-11-03 18:44:22,695 training dataset shape: (5020, 18895)
2020-11-03 18:44:22,696 validation dataset shape: (6121, 18895)
2020-11-03 18:44:34,173 epoch: 1, train loss: 32.71387481689453, val loss: 18.203872680664062
2020-11-03 18:44:44,751 epoch: 2, train loss: 32.206302642822266, val loss: 16.2027645111084
2020-11-03 18:44:55,216 epoch: 3, train loss: 31.917924880

snRNAseq ccrcc_train_gbm_val (2113, 33538) (5650, 19891)


2020-11-03 18:48:45,073 normalizing the expression counts for model training
2020-11-03 18:48:49,281 input dataset shape: (7763, 18895)
2020-11-03 18:48:49,283 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-03 18:48:49,285 possible cell types: [('Malignant', 1200), ('Microglia', 1000), ('T cells', 1000), ('Oligodendrocytes', 1000), ('Neuron', 1000), ('Fibroblast', 810), ('Endothelial', 229), ('NK', 200), ('Monocyte', 200), ('Dendritic', 200), ('CD8 T cell', 200), ('Treg', 200), ('Epithelial', 200), ('CD4 T cell', 200), ('Plasma', 113), ('B cell', 11)]
2020-11-03 18:48:51,177 training dataset shape: (4345, 18895)
2020-11-03 18:48:51,178 validation dataset shape: (3418, 18895)




2020-11-03 18:48:59,481 5 out of the last 73 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:48:59,582 6 out of the last 74 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:48:59,706 7 out of the last 76 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-03 18:48:59,928 5 out of the last 11 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 18:49:01,802 epoch: 1, train loss: 32.13343048095703, val loss: 18.66246223449707
2020-11-03 18:49:10,977 epoch: 2, train loss: 31.15593719482422, val loss: 16.567039489746094
2020-11-03 18:49:20,199 epoch: 3, train loss: 30.857988357543945, val loss: 16.439929962158203
2020-11-03 18:49:29,495 epoch: 4, train loss: 30.632963180541992, val loss: 16.191957473754883
2020-11-03 18:49:38,597 epoch: 5, train loss: 30.452970504760742, val l

snRNAseq gbm_train_brca_val (1689, 29748) (9028, 19891)


2020-11-03 18:52:31,697 normalizing the expression counts for model training
2020-11-03 18:52:37,004 input dataset shape: (10717, 17613)
2020-11-03 18:52:37,005 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells', 'Treg']
2020-11-03 18:52:37,007 possible cell types: [('Malignant', 1200), ('Fibroblast', 1200), ('B cell', 1200), ('Endothelial', 1200), ('Monocyte', 1089), ('Treg', 1000), ('CD8 T cell', 1000), ('CD4 T cell', 1000), ('Dendritic', 776), ('NK', 230), ('Neuron', 200), ('Oligodendrocytes', 200), ('T cells', 200), ('Microglia', 200), ('Mast', 22)]
2020-11-03 18:52:39,555 training dataset shape: (5304, 17613)
2020-11-03 18:52:39,556 validation dataset shape: (5413, 17613)




2020-11-03 18:52:49,934 5 out of the last 24 calls to <function compute_loss at 0x7f13991523b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
2020-11-03 18:52:51,311 epoch: 1, train loss: 31.3986873626709, val loss: 14.94650650024414
2020-11-03 18:53:01,890 epoch: 2, train loss: 31.186670303344727, val loss: 14.675933837890625
2020-11-03 18:53:12,297 epoch: 3, train loss: 30.71109962463379, val loss: 13.698417663574219
2020-11-03 18:53:22,542 epoch: 4, train loss: 30.50670051574707, val loss: 13.55552864074707
2020-11-03 18:53:33,045 epoch: 5, train loss: 30.19209098815918, val loss:

snRNAseq gbm_train_gbm_val (1689, 29748) (5650, 19891)


2020-11-03 18:57:00,852 normalizing the expression counts for model training
2020-11-03 18:57:04,462 input dataset shape: (7339, 17613)
2020-11-03 18:57:04,463 possible cell types: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-03 18:57:04,465 possible cell types: [('Malignant', 1200), ('Neuron', 1200), ('Oligodendrocytes', 1200), ('T cells', 1200), ('Microglia', 1200), ('Fibroblast', 810), ('Endothelial', 229), ('B cell', 211), ('Monocyte', 89)]
2020-11-03 18:57:06,017 training dataset shape: (3399, 17613)
2020-11-03 18:57:06,018 validation dataset shape: (3940, 17613)
2020-11-03 18:57:13,932 epoch: 1, train loss: 34.261287689208984, val loss: 16.715831756591797
2020-11-03 18:57:20,899 epoch: 2, train loss: 33.686092376708984, val loss: 16.369407653808594
2020-11-03 18:57:27,925 epoch: 3, train loss: 32.901466369628906, val loss: 15.76179027557373
2020-11-03 18:57:34,953 epoch: 4, train loss: 32.5741539001

###### testing stuff

In [None]:
train, val = sc.read_h5ad(adata_map['snRNAseq']['brca']['train']), sc.read_h5ad(adata_map['scRNAseq']['brca']['val'])

In [None]:
module_dir = os.path.join(SANDBOX_DIR, 'temp_module')

In [None]:
train.obs['is_validation'] = [False] * train.shape[0]
val.obs['is_validation'] = [True] * val.shape[0]
combined = train.concatenate(val)
combined

In [None]:
# pds = PollockDataset(train, cell_type_key=CELL_TYPE_KEY,
#                      dataset_type='training')

In [None]:
val.shape

In [None]:
pds = PollockDataset(combined, cell_type_key=CELL_TYPE_KEY,
                     dataset_type='training', validation_key='is_validation')

In [None]:
pm = PollockModel(pds.cell_types, pds.train_adata.shape[1], alpha=.0001, latent_dim=25)

In [None]:
pm.fit(pds, epochs=2)

In [None]:
pm.save(pds, module_dir)

In [None]:
val.shape

In [None]:
preds = predict_from_anndata(val.copy(),
        '/home/estorrs/pollock/benchmarking/sandbox/temp_module', adata_batch_size=10000)
preds

In [None]:
df = pd.DataFrame.from_dict({
    'cell_id': preds.index.to_list(),
    'groundtruth': val.obs.loc[preds.index][CELL_TYPE_KEY].to_list(),
    'predicted': preds['predicted_cell_type'],
    'probability': preds['cell_type_probability']
})
df

##### scanpy ingest

In [95]:
def ingest_preprocess(adata):
    adata.var['mt'] = adata.var_names.str.startswith('MT-')
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2500)
    adata.raw = adata
    adata = adata[:, adata.var.highly_variable]
    sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
    sc.pp.scale(adata)
    
    return adata

def run_scanpy_workflow(train, val, cell_type_key):
    var_names = train.var_names.intersection(val.var_names)
    train = train[:, var_names]
    val = val[:, var_names]
    
    groundtruth = val.obs[cell_type_key].to_list()

    sc.pp.pca(train)
    sc.pp.neighbors(train)
    sc.tl.umap(train)
    
    sc.tl.ingest(val, train, obs=cell_type_key)
    
    df = pd.DataFrame.from_dict({
        'cell_id': val.obs.index.to_list(),
        'groundtruth': groundtruth,
        'predicted': val.obs[cell_type_key].to_list(),
        'probability': [np.nan] * val.shape[0]
    })
    
    return df

In [22]:
run_workflow_for_datasets(adata_map, run_scanpy_workflow, 'scanpy_ingest', RESULTS_DIR)

scRNAseq brca (2600, 27131) (11253, 27131)


  if not is_categorical(df_full[k]):


scRNAseq cesc (1941, 22928) (8449, 22928)


  if not is_categorical(df_full[k]):


scRNAseq hnscc (2200, 26929) (10288, 26929)


  if not is_categorical(df_full[k]):


scRNAseq melanoma (2000, 23452) (6735, 23452)


  if not is_categorical(df_full[k]):


scRNAseq pbmc (940, 32738) (1698, 32738)


  if not is_categorical(df_full[k]):


scRNAseq pdac (3296, 28756) (15435, 28756)


  if not is_categorical(df_full[k]):


snATACseq brca (2064, 19891) (9028, 19891)


  if not is_categorical(df_full[k]):


snATACseq gbm (1316, 19891) (5650, 19891)


  if not is_categorical(df_full[k]):


snRNAseq brca (2455, 29175) (9490, 29175)


  if not is_categorical(df_full[k]):


snRNAseq ccrcc (2113, 33538) (8605, 33538)


  if not is_categorical(df_full[k]):


snRNAseq gbm (1689, 29748) (6810, 29748)


  if not is_categorical(df_full[k]):


In [96]:
run_workflow_for_cross_disease(adata_map, run_scanpy_workflow, 'scanpy_ingest', RESULTS_CROSS_DISEASE_DIR)

scRNAseq brca_train_cesc_val (2600, 27131) (8449, 22928)
scRNAseq brca_train_hnscc_val (2600, 27131) (10288, 26929)
scRNAseq brca_train_melanoma_val (2600, 27131) (6735, 23452)
scRNAseq brca_train_pbmc_val (2600, 27131) (1698, 32738)
scRNAseq brca_train_pdac_val (2600, 27131) (15435, 28756)
scRNAseq cesc_train_brca_val (1941, 22928) (11253, 27131)
scRNAseq cesc_train_hnscc_val (1941, 22928) (10288, 26929)
scRNAseq cesc_train_melanoma_val (1941, 22928) (6735, 23452)
scRNAseq cesc_train_pbmc_val (1941, 22928) (1698, 32738)
scRNAseq cesc_train_pdac_val (1941, 22928) (15435, 28756)
scRNAseq hnscc_train_brca_val (2200, 26929) (11253, 27131)
scRNAseq hnscc_train_cesc_val (2200, 26929) (8449, 22928)
scRNAseq hnscc_train_melanoma_val (2200, 26929) (6735, 23452)
scRNAseq hnscc_train_pbmc_val (2200, 26929) (1698, 32738)
scRNAseq hnscc_train_pdac_val (2200, 26929) (15435, 28756)
scRNAseq melanoma_train_brca_val (2000, 23452) (11253, 27131)
scRNAseq melanoma_train_cesc_val (2000, 23452) (8449, 229

In [97]:
run_workflow_for_cross_datatype(adata_map, run_scanpy_workflow, 'scanpy_ingest', RESULTS_CROSS_DTYPE_DIR)

snRNAseq brca_train_brca_val (2600, 27131) (9028, 19891)
snRNAseq brca_train_gbm_val (2600, 27131) (5650, 19891)
snRNAseq cesc_train_brca_val (1941, 22928) (9028, 19891)
snRNAseq cesc_train_gbm_val (1941, 22928) (5650, 19891)
snRNAseq hnscc_train_brca_val (2200, 26929) (9028, 19891)
snRNAseq hnscc_train_gbm_val (2200, 26929) (5650, 19891)
snRNAseq melanoma_train_brca_val (2000, 23452) (9028, 19891)
snRNAseq melanoma_train_gbm_val (2000, 23452) (5650, 19891)
snRNAseq pbmc_train_brca_val (940, 32738) (9028, 19891)
snRNAseq pbmc_train_gbm_val (940, 32738) (5650, 19891)
snRNAseq pdac_train_brca_val (3296, 28756) (9028, 19891)
snRNAseq pdac_train_gbm_val (3296, 28756) (5650, 19891)
snRNAseq brca_train_brca_val (2600, 27131) (9490, 29175)
snRNAseq brca_train_ccrcc_val (2600, 27131) (8605, 33538)
snRNAseq brca_train_gbm_val (2600, 27131) (6810, 29748)
snRNAseq cesc_train_brca_val (1941, 22928) (9490, 29175)
snRNAseq cesc_train_ccrcc_val (1941, 22928) (8605, 33538)
snRNAseq cesc_train_gbm_val 

###### testing stuff

In [None]:
train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()

In [None]:
train, val = ingest_preprocess(train), ingest_preprocess(val)

var_names = train.var_names.intersection(val.var_names)
train = train[:, var_names]
val = val[:, var_names]

sc.pp.pca(train)
sc.pp.neighbors(train)
sc.tl.umap(train)

In [None]:
sc.pl.umap(train, color='cell_type')

In [None]:
sc.tl.ingest(val, train, obs=CELL_TYPE_KEY)
val.uns[f'{CELL_TYPE_KEY}_colors'] = train.uns[f'{CELL_TYPE_KEY}_colors']

In [None]:
sc.pl.umap(val, color=[CELL_TYPE_KEY], wspace=0.5)


In [None]:
val

In [None]:
val.obs

##### ACTINN

In [9]:
def run_actinn_workflow(train, val, cell_type_key):
    X = train.X.toarray() if 'sparse' in str(type(train.X)) else train.X
    train_counts_df = pd.DataFrame(data=X.transpose(), index=train.var.index.to_list(),
                        columns=train.obs.index.to_list())
    X = val.X.toarray() if 'sparse' in str(type(val.X)) else val.X
    val_counts_df = pd.DataFrame(data=X.transpose(), index=val.var.index.to_list(),
                        columns=val.obs.index.to_list())
    
    train_counts_fp = os.path.join(SANDBOX_DIR, 'train_counts.txt')
    val_counts_fp = os.path.join(SANDBOX_DIR, 'val_counts.txt')
    train_counts_df.to_csv(train_counts_fp, sep='\t')
    val_counts_df.to_csv(val_counts_fp, sep='\t')
    
    train_h5_fp = os.path.join(SANDBOX_DIR, 'train.h5')
    train_annotations_fp = os.path.join(SANDBOX_DIR, 'train_annotations.txt')
    val_h5_fp = os.path.join(SANDBOX_DIR, 'val.h5')

    train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', index=True, header=False)

    subprocess.check_output(('python', ACTINN_FORMAT, '-i', train_counts_fp,
                            '-o', train_h5_fp.replace('.h5', ''), '-f', 'txt'))
    subprocess.check_output(('python', ACTINN_FORMAT, '-i', val_counts_fp,
                            '-o', val_h5_fp.replace('.h5', ''), '-f', 'txt'))
    # dont use probablity argument or it breaks
    subprocess.check_output(('python', ACTINN_PREDICT, '-trs', train_h5_fp,
                            '-trl', train_annotations_fp, '-ts', val_h5_fp))
    
    prediction_df = pd.read_csv('predicted_label.txt', sep='\t')
    
    df = pd.DataFrame.from_dict({
        'cell_id': prediction_df['cellname'].to_list(),
        'predicted': prediction_df['celltype'].to_list(),
        'probability': [np.nan] * prediction_df.shape[0]
    })
    
    df = pd.merge(df, val.obs, left_on='cell_id', right_index=True)
    df = df[['cell_id', 'cell_type', 'predicted', 'probability']]
    df.columns = ['cell_id', 'groundtruth', 'predicted', 'probability']
    
    return df
    
    
    
    


In [10]:
ACTINN_FORMAT = '/home/estorrs/ACTINN/actinn_format.py'
ACTINN_PREDICT = '/home/estorrs/ACTINN/actinn_predict.py'

run_workflow_for_datasets(adata_map, run_actinn_workflow, 'actinn', RESULTS_DIR)

scRNAseq brca (2600, 27131) (11253, 27131)
scRNAseq cesc (1941, 22928) (8449, 22928)
scRNAseq hnscc (2200, 26929) (10288, 26929)
scRNAseq melanoma (2000, 23452) (6735, 23452)
scRNAseq pbmc (940, 32738) (1698, 32738)
scRNAseq pdac (3296, 28756) (15435, 28756)
snATACseq brca (2064, 19891) (9028, 19891)
snATACseq gbm (1316, 19891) (5650, 19891)
snRNAseq brca (2455, 29175) (9490, 29175)
snRNAseq ccrcc (2113, 33538) (8605, 33538)
snRNAseq gbm (1689, 29748) (6810, 29748)


###### testing stuff

In [None]:
train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()

In [None]:
# train.obs['dataset'] = ['train'] * train.shape[0]
# val.obs['dataset'] = ['val'] * val.shape[0]
# combined = train.concatenate(val)
# combined

In [None]:
train_counts_df = pd.DataFrame(data=train.X.transpose().toarray(), index=train.var.index.to_list(),
                        columns=train.obs.index.to_list())
val_counts_df = pd.DataFrame(data=val.X.transpose().toarray(), index=val.var.index.to_list(),
                        columns=val.obs.index.to_list())
train_counts_df

In [None]:
train_counts_fp = os.path.join(SANDBOX_DIR, 'train_counts.txt')
val_counts_fp = os.path.join(SANDBOX_DIR, 'val_counts.txt')
train_counts_df.to_csv(train_counts_fp, sep='\t')
val_counts_df.to_csv(val_counts_fp, sep='\t')

python actinn_format.py -i input_file -o output_prefix -f format

python actinn_format.py -i ./test_data/train_set.txt.gz -o train_set -f txt


In [None]:
train_h5_fp = os.path.join(SANDBOX_DIR, 'train.h5')
train_annotations_fp = os.path.join(SANDBOX_DIR, 'train_annotations.txt')
val_h5_fp = os.path.join(SANDBOX_DIR, 'val.h5')

train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', index=True, header=False)

subprocess.check_output(('python', '/home/estorrs/ACTINN/actinn_format.py', '-i', train_counts_fp,
                        '-o', train_h5_fp.replace('.h5', ''), '-f', 'txt'))

In [None]:
subprocess.check_output(('python', '/home/estorrs/ACTINN/actinn_format.py', '-i', val_counts_fp,
                        '-o', val_h5_fp.replace('.h5', ''), '-f', 'txt'))

In [None]:
train.obs[[CELL_TYPE_KEY]]

python actinn_predict.py -trs training_set -trl training_label -ts test_set -lr learning_rat -ne num_epoch -ms minibatch_size -pc print_cost -op output_probability


-trs Path to the training set, must be HDF5 format with key "dge".

-trl Path to the training label (the cell types for the training set), must be tab separated text file with no column and row names.

-ts Path to test sets, must be HDF5 format with key "dge".

-lr Learning rate (default: 0.0001). We can increase the learning rate if the cost drops too slow, or decrease the learning rate if the cost drops super fast in the beginning and starts to fluctuate in later epochs.

-ne Number of epochs (default: 50). The number of epochs can be determined by looking at the cost after each epoch. If the cost starts to decrease very slowly after ceartain epoch, then the "ne" parameter should be set to that epoch number.

-ms Minibatch size (default: 128). This parameter can be set larger when training a large dataset.

-pc Print cost (default: True). Whether to print cost after each 5 epochs.

-op Output probabilities for each cell being the cell types in the training data (default: False).


In [None]:
subprocess.check_output(('python', '/home/estorrs/ACTINN/actinn_predict.py', '-trs', train_h5_fp,
                        '-trl', train_annotations_fp, '-ts', val_h5_fp))

In [None]:
' '.join(('python', '/home/estorrs/ACTINN/actinn_predict.py', '-trs', train_h5_fp,
                        '-trl', train_annotations_fp, '-ts', val_h5_fp,
                        '-op', 'True'))

In [None]:
prediction_df = pd.read_csv('predicted_label.txt', sep='\t')
prediction_df

In [None]:
df = pd.DataFrame.from_dict({
        'cell_id': prediction_df['cellname'].to_list(),
        'prediction': prediction_df['celltype'].to_list(),
        'probability': [np.nan] * val.shape[0]
    })
df

In [None]:
val.obs

In [None]:
df = pd.merge(df, val.obs, left_on='cell_id', right_index=True)
df = df[['cell_id', 'cell_type', 'prediction', 'probability']]
df.columns = ['cell_id', 'groundtruth', 'prediction', 'probability']
df


##### Seurat

In [105]:
def run_seurat_transfer(train, val, cell_type_key):
    # save the input data for the seurat script
    train_counts_fp, val_counts_fp = (os.path.join(SANDBOX_DIR, 'train_counts.txt'),
                                        os.path.join(SANDBOX_DIR, 'val_counts.txt'))
    train_annotations_fp, val_annotations_fp = (os.path.join(SANDBOX_DIR, 'train_annotations.txt'),
                                                os.path.join(SANDBOX_DIR, 'val_annotations.txt'))

    ## prepare train and val count matrices
    X = train.X.toarray() if 'sparse' in str(type(train.X)) else train.X
    train_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=train.var.index,
                                columns=train.obs.index)
    train_counts.index.name = ''
    # for some reason SCTransform fails if the integer values are too high, so capping them here
    cap = pow(2, 14)
    train_counts.values[train_counts.values>cap] = cap
    train_counts.to_csv(train_counts_fp, sep='\t', header=True, index=True)
    
    X = val.X.toarray() if 'sparse' in str(type(val.X)) else val.X
    val_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=val.var.index,
                                columns=val.obs.index)
    val_counts.index.name = ''
    val_counts.values[val_counts.values>cap] = cap
    val_counts.to_csv(val_counts_fp, sep='\t', header=True, index=True)

    train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', header=False, index=False)
    val.obs[[CELL_TYPE_KEY]].to_csv(val_annotations_fp, sep='\t', header=False, index=False)
    
    # actually run the script and read the results back in
    prediction_fp = os.path.join(SANDBOX_DIR, 'seurat_predictions.txt')
    try:
        subprocess.check_output(('Rscript', SEURAT_SCRIPT, train_counts_fp, train_annotations_fp,
                            val_counts_fp, val_annotations_fp, prediction_fp))
    except subprocess.CalledProcessError as e:
        print(f'called process error', e)
        return pd.DataFrame()
    
    # format the predictions dataframe
    df = pd.read_csv(prediction_fp, sep='\t')
    df.index = [x.replace('.', '-') for x in df.index]
    # also remove that weird X thing seurat sometimes puts there if first char is _
    df.index = [x[1:] if x[:2]=='X_' else x for x in df.index]
    df = pd.merge(df, val.obs, left_index=True, right_index=True)
    df['cell_id'] = df.index.to_list()
    try:
        df = df[['cell_id', 'cell_type', 'predicted.id', 'prediction.score.max']]        
        df.columns = ['cell_id', 'groundtruth', 'predicted', 'probability']
        return df
    except KeyError as e:
        print(f'key error', e)
        return pd.DataFrame()

In [108]:
SEURAT_SCRIPT = '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R'
run_workflow_for_datasets(adata_map, run_seurat_transfer, 'seurat_transfer', RESULTS_DIR)

scRNAseq brca (2600, 27131) (11253, 27131)
scRNAseq cesc (1941, 22928) (8449, 22928)
scRNAseq hnscc (2200, 26929) (10288, 26929)
scRNAseq melanoma (2000, 23452) (6735, 23452)
called process error Command '('Rscript', '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R', '/home/estorrs/pollock/benchmarking/sandbox/train_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/train_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/seurat_predictions.txt')' returned non-zero exit status 1.
scRNAseq pbmc (940, 32738) (1698, 32738)
scRNAseq pdac (3296, 28756) (15435, 28756)
snATACseq brca (2064, 19891) (9028, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snATACseq gbm (1316, 19891) (5650, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq brca (2455, 29175) (9490, 29175)
snRNAseq ccrc

In [106]:
run_workflow_for_cross_disease(adata_map, run_seurat_transfer, 'seurat_transfer', RESULTS_CROSS_DISEASE_DIR)

scRNAseq brca_train_cesc_val (2600, 27131) (8449, 22928)
scRNAseq brca_train_hnscc_val (2600, 27131) (10288, 26929)
scRNAseq brca_train_melanoma_val (2600, 27131) (6735, 23452)
called process error Command '('Rscript', '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R', '/home/estorrs/pollock/benchmarking/sandbox/train_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/train_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/seurat_predictions.txt')' returned non-zero exit status 1.
scRNAseq brca_train_pbmc_val (2600, 27131) (1698, 32738)
scRNAseq brca_train_pdac_val (2600, 27131) (15435, 28756)
scRNAseq cesc_train_brca_val (1941, 22928) (11253, 27131)
scRNAseq cesc_train_hnscc_val (1941, 22928) (10288, 26929)
scRNAseq cesc_train_melanoma_val (1941, 22928) (6735, 23452)
called process error Command '('Rscript', '/home/estorrs/poll

In [107]:
run_workflow_for_cross_datatype(adata_map, run_seurat_transfer, 'seurat_transfer', RESULTS_CROSS_DTYPE_DIR)

snRNAseq brca_train_brca_val (2600, 27131) (9028, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq brca_train_gbm_val (2600, 27131) (5650, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq cesc_train_brca_val (1941, 22928) (9028, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq cesc_train_gbm_val (1941, 22928) (5650, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq hnscc_train_brca_val (2200, 26929) (9028, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq hnscc_train_gbm_val (2200, 26929) (5650, 19891)
key error "['predicted.id', 'prediction.score.max'] not in index"
snRNAseq melanoma_train_brca_val (2000, 23452) (9028, 19891)
called process error Command '('Rscript', '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R', '/home/estorrs/pollock/benchmarking/sandbox/train_counts.txt', '/home/estorrs/pollock/benchmar

snRNAseq gbm_train_brca_val (1316, 19891) (11253, 27131)
called process error Command '('Rscript', '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R', '/home/estorrs/pollock/benchmarking/sandbox/train_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/train_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/seurat_predictions.txt')' returned non-zero exit status 1.
snRNAseq gbm_train_cesc_val (1316, 19891) (8449, 22928)
snRNAseq gbm_train_hnscc_val (1316, 19891) (10288, 26929)
called process error Command '('Rscript', '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R', '/home/estorrs/pollock/benchmarking/sandbox/train_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/train_annotations.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_counts.txt', '/home/estorrs/pollock/benchmarking/sandbox/val_annotations.

###### testing stuff

In [55]:
# train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()
train, val = sc.read_h5ad(adata_map['scRNAseq']['brca']['train']), sc.read_h5ad(adata_map['scRNAseq']['brca']['val'])

In [56]:
pow(2, 14)

16384

In [57]:
# save the input data for the seurat script
train_counts_fp, val_counts_fp = (os.path.join(SANDBOX_DIR, 'train_counts.txt'),
                                    os.path.join(SANDBOX_DIR, 'val_counts.txt'))
train_annotations_fp, val_annotations_fp = (os.path.join(SANDBOX_DIR, 'train_annotations.txt'),
                                            os.path.join(SANDBOX_DIR, 'val_annotations.txt'))

## prepare train and val count matrices
X = train.X.toarray() if 'sparse' in str(type(train.X)) else train.X
train_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=train.var.index,
                            columns=train.obs.index)
train_counts.index.name = ''
# for some reason SCTransform fails if the integer values are too high, so capping them here
cap = pow(2, 14)
train_counts.values[train_counts.values>cap] = cap
train_counts.to_csv(train_counts_fp, sep='\t', header=True, index=True)

X = val.X.toarray() if 'sparse' in str(type(val.X)) else val.X
val_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=val.var.index,
                            columns=val.obs.index)
val_counts.index.name = ''
val_counts.values[val_counts.values>cap] = cap
val_counts.to_csv(val_counts_fp, sep='\t', header=True, index=True)

train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', header=False, index=False)
val.obs[[CELL_TYPE_KEY]].to_csv(val_annotations_fp, sep='\t', header=False, index=False)

In [58]:
train_counts

Unnamed: 0,_HT062B1_S1PA_AACCATGTCTCTGGTC-1,_HT062B1_S1PA_AAGCGAGAGAACAGGA-1,_HT062B1_S1PA_AAGTCGTCACAGAGAC-1,_HT062B1_S1PA_AATGACCTCAGGACAG-1,_HT062B1_S1PA_AATGGCTAGCACGGAT-1,_HT062B1_S1PA_ACATTTCGTACTAACC-1,_HT062B1_S1PA_ACGTCCTGTGTATACC-1,_HT062B1_S1PA_ACTGTGAGTGTGGACA-1,_HT062B1_S1PA_ACTTCGCCACAAGCAG-1,_HT062B1_S1PA_ATACTTCGTACAAGTA-1,...,_HT171B1_BC2_TTCGCTGGTATGGTAA-1,_HT171B1_BC2_TTCTAGTCAAGTTCCA-1,_HT171B1_BC2_TTCTTGACATGACTGT-1,_HT171B1_BC2_TTGAACGCAAACCACT-1,_HT171B1_BC2_TTGAACGTCAAGCCCG-1,_HT171B1_BC2_TTGAACGTCGGATACT-1,_HT171B1_BC2_TTGAGTGGTCCAATCA-1,_HT171B1_BC2_TTGATGGCAGAGTGTG-1,_HT171B1_BC2_TTGGGATTCGATGCAT-1,_HT171B1_BC2_TTTCAGTGTTCGGTAT-1
,,,,,,,,,,,,,,,,,,,,,
AL627309.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AL627309.3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AL627309.4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AL732372.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AL669831.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AL354822.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AC004556.1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
AC233755.2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
train_counts

In [None]:
type(train_counts.values), type(train_counts.values[0, 0])

In [None]:
vals = sorted(set(train_counts.values.flatten()))
vals

In [None]:
vals[:10], vals[-10:]

In [None]:
train_counts.values[train_counts.values>1000] = 1000

In [None]:
np.where(train_counts>1)

In [59]:
# actually run the script and read the results back in
prediction_fp = os.path.join(SANDBOX_DIR, 'seurat_predictions.txt')
subprocess.check_output(('Rscript', SEURAT_SCRIPT, train_counts_fp, train_annotations_fp,
                    val_counts_fp, val_annotations_fp, prediction_fp))



In [65]:
# format the predictions dataframe
df = pd.read_csv(prediction_fp, sep='\t')
df.index = [x.replace('.', '-') for x in df.index]
# also remove that weird X thing seurat sometimes puts there
df.index = [x[1:] if x[:2]=='X_' else x for x in df.index]
df = pd.merge(df, val.obs, left_index=True, right_index=True)
df['cell_id'] = df.index.to_list()
df = df[['cell_id', 'cell_type', 'predicted.id', 'prediction.score.max']]        
df.columns = ['cell_id', 'groundtruth', 'predicted', 'probability']
df

Unnamed: 0,cell_id,groundtruth,predicted,probability
_HT062B1_S1PA_AAACGAATCGTGGCGT-1,_HT062B1_S1PA_AAACGAATCGTGGCGT-1,CD8 T cell,CD8 T cell,0.928408
_HT062B1_S1PA_AAAGAACAGCTATCTG-1,_HT062B1_S1PA_AAAGAACAGCTATCTG-1,NK,NK,1.000000
_HT062B1_S1PA_AAAGGATTCTGCCTGT-1,_HT062B1_S1PA_AAAGGATTCTGCCTGT-1,Endothelial,Endothelial,1.000000
_HT062B1_S1PA_AAAGGTAGTGGAAGTC-1,_HT062B1_S1PA_AAAGGTAGTGGAAGTC-1,Treg,Treg,0.951956
_HT062B1_S1PA_AACAACCTCACTCACC-1,_HT062B1_S1PA_AACAACCTCACTCACC-1,Endothelial,Endothelial,1.000000
...,...,...,...,...
_HT171B1_BC2_TTTCGATGTGTAGGAC-1,_HT171B1_BC2_TTTCGATGTGTAGGAC-1,CD4 T cell,CD4 T cell,0.916704
_HT171B1_BC2_TTTGACTGTCGATTTG-1,_HT171B1_BC2_TTTGACTGTCGATTTG-1,CD8 T cell,CD8 T cell,0.931814
_HT171B1_BC2_TTTGACTGTCGGTGAA-1,_HT171B1_BC2_TTTGACTGTCGGTGAA-1,CD4 T cell,CD4 T cell,0.914046
_HT171B1_BC2_TTTGGTTCATAGACTC-1,_HT171B1_BC2_TTTGGTTCATAGACTC-1,CD4 T cell,CD4 T cell,0.801086


In [61]:
val.obs

Unnamed: 0,orig.ident,nCount_RNA,nFeature_RNA,percent.mito,nCount_SCT,nFeature_SCT,SCT_snn_res.0.5,seurat_clusters,sample,tissue_type,cell_type,cell_type_specific,Piece_ID,Clinical_Subtype,Bulk_PAM50,doublet_score,predicted_doublet,ident
_HT062B1_S1PA_AAACGAATCGTGGCGT-1,TWCE-HT062B1-S1PAA1A1Z1B1,3098.0,1172,0.096191,3487.0,1172,2,2,HT062B1,Tumor tissue,CD8 T cell,NK,HT062B1_S1PA,TNBC,Her2,0.030261,0,2
_HT062B1_S1PA_AAAGAACAGCTATCTG-1,TWCE-HT062B1-S1PAA1A1Z1B1,4761.0,1779,0.051250,4226.0,1779,11,11,HT062B1,Tumor tissue,NK,NK,HT062B1_S1PA,TNBC,Her2,0.018460,0,11
_HT062B1_S1PA_AAAGGATTCTGCCTGT-1,TWCE-HT062B1-S1PAA1A1Z1B1,74559.0,9098,0.003527,4737.0,2302,4,4,HT062B1,Tumor tissue,Endothelial,Endothelial,HT062B1_S1PA,TNBC,Her2,0.083898,0,4
_HT062B1_S1PA_AAAGGTAGTGGAAGTC-1,TWCE-HT062B1-S1PAA1A1Z1B1,3204.0,1250,0.031835,3514.0,1250,0,0,HT062B1,Tumor tissue,Treg,Treg,HT062B1_S1PA,TNBC,Her2,0.030261,0,0
_HT062B1_S1PA_AACAACCTCACTCACC-1,TWCE-HT062B1-S1PAA1A1Z1B1,15285.0,4271,0.054498,4281.0,1968,4,4,HT062B1,Tumor tissue,Endothelial,Endothelial,HT062B1_S1PA,TNBC,Her2,0.031609,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
_HT171B1_BC2_TTTCGATGTGTAGGAC-1,TWCE-HT171B1-BC2,3090.0,1231,0.030421,3479.0,1231,0,0,HT171B1,Tumor tissue,CD4 T cell,CD4_T,HT171B1_S1H8,TNBC,Basal,0.066288,0,0
_HT171B1_BC2_TTTGACTGTCGATTTG-1,TWCE-HT171B1-BC2,3594.0,1549,0.059265,3664.0,1549,8,8,HT171B1,Tumor tissue,CD8 T cell,NK,HT171B1_S1H8,TNBC,Basal,0.053524,0,8
_HT171B1_BC2_TTTGACTGTCGGTGAA-1,TWCE-HT171B1-BC2,1751.0,715,0.083952,3437.0,725,0,0,HT171B1,Tumor tissue,CD4 T cell,CD4_T,HT171B1_S1H8,TNBC,Basal,0.016301,0,0
_HT171B1_BC2_TTTGGTTCATAGACTC-1,TWCE-HT171B1-BC2,3185.0,1283,0.058713,3511.0,1283,0,0,HT171B1,Tumor tissue,CD4 T cell,CD4_T,HT171B1_S1H8,TNBC,Basal,0.072103,0,0


##### SingleCellNet

In [2]:
# !pip install git+https://github.com/pcahan1/PySingleCellNet/

In [109]:
import pySingleCellNet as pySCN

In [110]:
def run_SingleCellNet(train, val, cell_type_key):
    # save the input data for the seurat script
    cgenesA, xpairs, tspRF = pySCN.scn_train(train,
            nTopGenes=100, nRand=100, nTrees=1000, nTopGenePairs=100,
            dLevel=cell_type_key, stratify=True, limitToHVG=True, )
    predictions = pySCN.scn_classify(val, cgenesA, xpairs, tspRF, nrand = 0)
    
    df = pd.merge(predictions.obs[['SCN_class']], val.obs, left_index=True, right_index=True)
    
    df = df[['cell_type', 'SCN_class']]
    df.columns = ['groundtruth', 'predicted']
    df['cell_id'] = df.index.to_list()
    df['probability'] = [np.nan] * df.shape[0]
    df = df[['cell_id', 'groundtruth', 'predicted', 'probability']]
    

    return df

In [111]:
run_workflow_for_datasets(adata_map, run_SingleCellNet, 'SingleCellNet', RESULTS_DIR)

scRNAseq brca (2600, 27131) (11253, 27131)
HVG
Matrix normalized
There are  1104  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1270 top gene pairs

Finished pair transforming the data

scRNAseq cesc (1941, 22928) (8449, 22928)
HVG
Matrix normalized
There are  976  classification genes

CD4 T cell
CD8 T cell
Endothelial
Epithelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
There are 1073 top gene pairs

Finished pair transforming the data

scRNAseq hnscc (2200, 26929) (10288, 26929)
HVG
Matrix normalized
There are  1100  classification genes

B cell
CD4 T cell
CD8 T cell
Endothelial
Erythrocyte
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1073 top gene pairs

Finished pair transforming the data

scRNAseq melanoma (2000, 23452) (6735, 23452)
HVG
Matrix normalized
There are  900  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Fibroblast
Malignant
Monocyte
NK


In [112]:
run_workflow_for_cross_disease(adata_map, run_SingleCellNet, 'SingleCellNet', RESULTS_CROSS_DISEASE_DIR)

scRNAseq brca_train_cesc_val (2600, 27131) (8449, 22928)
HVG
Matrix normalized
There are  1104  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1270 top gene pairs

Finished pair transforming the data

scRNAseq brca_train_hnscc_val (2600, 27131) (10288, 26929)
HVG
Matrix normalized
There are  1104  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1270 top gene pairs

Finished pair transforming the data

scRNAseq brca_train_melanoma_val (2600, 27131) (6735, 23452)
HVG
Matrix normalized
There are  1104  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1270 top gene pairs

Finished pair transforming the data

scRNAseq brca_train_pbmc_val (2600, 27131) (1698, 32738)
HVG
Matrix normalized
There are  1104  c

Treg
Tuft
There are 1637 top gene pairs

Finished pair transforming the data

scRNAseq pdac_train_pbmc_val (3296, 28756) (1698, 32738)
HVG
Matrix normalized
There are  1358  classification genes

Acinar
B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Epithelial
Erythrocyte
Fibroblast
Islet
Malignant
Mast
Monocyte
NK
Plasma
Treg
Tuft
There are 1637 top gene pairs

Finished pair transforming the data

snATACseq brca_train_gbm_val (2064, 19891) (5650, 19891)
HVG
Matrix normalized
There are  1119  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Fibroblast
Malignant
Mast
Monocyte
NK
Treg
There are 1080 top gene pairs

Finished pair transforming the data

snATACseq gbm_train_brca_val (1316, 19891) (9028, 19891)
HVG
Matrix normalized
There are  989  classification genes

B cell
Endothelial
Fibroblast
Malignant
Microglia
Neuron
Oligodendrocytes
T cells
There are 792 top gene pairs

Finished pair transforming the data

snRNAseq brca_train_ccrcc_val (2455, 29175) (860

In [113]:
run_workflow_for_cross_datatype(adata_map, run_SingleCellNet, 'SingleCellNet', RESULTS_CROSS_DTYPE_DIR)

snRNAseq brca_train_brca_val (2600, 27131) (9028, 19891)
HVG
Matrix normalized
There are  1104  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1270 top gene pairs

Finished pair transforming the data

snRNAseq brca_train_gbm_val (2600, 27131) (5650, 19891)
HVG
Matrix normalized
There are  1104  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
Treg
There are 1270 top gene pairs

Finished pair transforming the data

snRNAseq cesc_train_brca_val (1941, 22928) (9028, 19891)
HVG
Matrix normalized
There are  976  classification genes

CD4 T cell
CD8 T cell
Endothelial
Epithelial
Erythrocyte
Fibroblast
Malignant
Mast
Monocyte
NK
Plasma
There are 1073 top gene pairs

Finished pair transforming the data

snRNAseq cesc_train_gbm_val (1941, 22928) (5650, 19891)
HVG
Matrix normalized
There are  976  classification genes



Finished pair transforming the data

snRNAseq pdac_train_gbm_val (3296, 28756) (6810, 29748)
HVG
Matrix normalized
There are  1358  classification genes

Acinar
B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Epithelial
Erythrocyte
Fibroblast
Islet
Malignant
Mast
Monocyte
NK
Plasma
Treg
Tuft
There are 1637 top gene pairs

Finished pair transforming the data

snRNAseq brca_train_brca_val (2064, 19891) (11253, 27131)
HVG
Matrix normalized
There are  1119  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Fibroblast
Malignant
Mast
Monocyte
NK
Treg
There are 1080 top gene pairs

Finished pair transforming the data

snRNAseq brca_train_cesc_val (2064, 19891) (8449, 22928)
HVG
Matrix normalized
There are  1119  classification genes

B cell
CD4 T cell
CD8 T cell
Dendritic
Endothelial
Fibroblast
Malignant
Mast
Monocyte
NK
Treg
There are 1080 top gene pairs

Finished pair transforming the data

snRNAseq brca_train_hnscc_val (2064, 19891) (10288, 26929)
HVG
Matrix norma

snRNAseq ccrcc_train_pbmc_val (2113, 33538) (1698, 32738)
HVG
Matrix normalized
There are  1142  classification genes

CD4 T cell
CD8 T cell
Dendritic
Endothelial
Epithelial
Fibroblast
Malignant
Monocyte
NK
Plasma
Treg
There are 1074 top gene pairs

Finished pair transforming the data

snRNAseq ccrcc_train_pdac_val (2113, 33538) (15435, 28756)
HVG
Matrix normalized
There are  1142  classification genes

CD4 T cell
CD8 T cell
Dendritic
Endothelial
Epithelial
NK
Plasma
Treg
There are 1074 top gene pairs

Finished pair transforming the data

snRNAseq gbm_train_brca_val (1689, 29748) (11253, 27131)
HVG
Matrix normalized
There are  1005  classification genes

B cell
Endothelial
Fibroblast
Malignant
Microglia
Monocyte
Neuron
Oligodendrocytes
T cells
There are 891 top gene pairs

Finished pair transforming the data

snRNAseq gbm_train_cesc_val (1689, 29748) (8449, 22928)
HVG
Matrix normalized
There are  1005  classification genes

B cell
Endothelial
Fibroblast
Malignant
Microglia
Monocyte
Neu

###### testing stuff

In [None]:
train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()

In [None]:
cgenesA, xpairs, tspRF = pySCN.scn_train(train,
            nTopGenes = 100, nRand = 100, nTrees = 1000 ,nTopGenePairs = 100,
            dLevel = "cell_type", stratify=True, limitToHVG=True, )

In [None]:
predictions = pySCN.scn_classify(val, cgenesA, xpairs, tspRF, nrand = 0)


In [None]:
predictions.obs

In [None]:
df = pd.merge(predictions.obs[['SCN_class']], val.obs, left_index=True, right_index=True)

df = df[['cell_type', 'SCN_class']]
df.index.name = 'cell_id'
df.columns = ['groundtruth', 'predictions']
df['probability'] = [np.nan] * df.shape[0]
df

##### pollock

###### testing stuff