In [1]:
from pathlib import Path
from collections import Counter
import os
import re
import random
import subprocess

import anndata
import scanpy as sc
import pandas as pd
import numpy as np

import mgitools.os_helpers as os_helpers

In [2]:
# !pip install git+https://github.com/estorrs/mgitools
# !pip install tensorflow==2.1.0

In [3]:
%load_ext autoreload

In [4]:
%autoreload 2

In [5]:
# !pip install -e /home/estorrs/pollock/
import pollock
from pollock.models.model import PollockDataset, PollockModel, load_from_directory, predict_from_anndata

In [6]:
# !conda install -y scanpy

In [7]:
# !pip install git+https://github.com/estorrs/mgitools

In [8]:
CELL_TYPE_KEY = 'cell_type'
N_PER_CELL_TYPE = 200
DATA_DIR = '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_1/'
RESULTS_DIR = '/home/estorrs/pollock/benchmarking/results/10272020_teir1'
RESULTS_CROSS_DISEASE_DIR = '/home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_disease'
RESULTS_CROSS_DTYPE_DIR = '/home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype'
SANDBOX_DIR = '/home/estorrs/pollock/benchmarking/sandbox'

Path(RESULTS_DIR).mkdir(parents=True, exist_ok=True)
Path(RESULTS_CROSS_DISEASE_DIR).mkdir(parents=True, exist_ok=True)
Path(RESULTS_CROSS_DTYPE_DIR).mkdir(parents=True, exist_ok=True)

##### create training and validation datasets

only run if you haven't created these datasets yet

In [None]:
def cap_list(ls, n=100, split=.8, oversample=True):
    """
    Grabs items from a pool.
    
    if split * pool size is greater than n, then just randomly sample 80% of the pool
    otherwise sample 80% of the pool, then oversample so you end up with a final size of n
    """
    # just return list if it is of length 1
    if len(ls) <= 1: return ls
    cap = int(len(ls) * split)
    if cap > n:
        return random.sample(ls, n)

    if oversample:
        pool = random.sample(ls, cap)
        ## oversample to
        return random.choices(pool, k=n)

    return random.sample(ls, cap)

def balancedish_training_generator(adata, cell_type_key, n_per_cell_type, oversample=True, split=.8):
    """
    Return balanced train and validation sets
    """
    cell_type_to_idxs = {}
    for cell_id, cell_type in zip(adata.obs.index, adata.obs[cell_type_key]):
        if cell_type not in cell_type_to_idxs:
            cell_type_to_idxs[cell_type] = [cell_id]
        else:
            cell_type_to_idxs[cell_type].append(cell_id)

    cell_type_to_idxs = {k:cap_list(ls, n_per_cell_type, oversample=oversample, split=split)
                         for k, ls in cell_type_to_idxs.items()}

    train_ids = np.asarray([x for ls in cell_type_to_idxs.values() for x in ls])
    train_idxs = np.arange(adata.shape[0])[np.isin(np.asarray(adata.obs.index), train_ids)]
    val_idxs = np.delete(np.arange(adata.shape[0]), train_idxs)

    train_adata = adata[train_idxs, :]
    val_adata = adata[val_idxs, :]

    return train_adata, val_adata

# def create_train_val_datasets(adata, cell_type_key, oversample=True):
#     counts = Counter(adata.obs[cell_type_key])
#     min_count = counts.most_common()[-1][1]
#     n_per_cell_type = max(min_count, )
#     train_adata, val_adata = balancedish_training_generator(adata, cell_type_key,
#                                                             n_per_cell_type, oversample=oversample)
#     return train_adata, val_adata

In [None]:
fps = sorted(os_helpers.listfiles(DATA_DIR, regex='.h5ad$'))
fp_map = {fp.split('/')[-2]:{} for fp in fps}
for fp in fps:
    if '/_train.h5ad' not in fp and '/_val.h5ad' not in fp:
        dtype = fp.split('/')[-2]
        disease = fp.split('/')[-1].replace('.h5ad', '')
        fp_map[dtype][disease] = fp
fp_map

In [None]:
for dtype, d in fp_map.items():
    for disease, fp in d.items():
        print(dtype, disease)
        adata = sc.read_h5ad(fp)
        # check for cell type key
        if CELL_TYPE_KEY not in adata.obs: raise RuntimeError(f'{CELL_TYPE_KEY} not in {fp}')
        
        train_adata, val_adata = balancedish_training_generator(adata, CELL_TYPE_KEY, N_PER_CELL_TYPE)
        # resample validation data to make dataset smaller while keeping rare cell types
        val_adata, _ = balancedish_training_generator(val_adata, CELL_TYPE_KEY, 1000, oversample=False,
                                                     split=1.)
        train_adata.write_h5ad(fp.replace('.h5ad', '_train.h5ad'))
        val_adata.write_h5ad(fp.replace('.h5ad', '_val.h5ad'))
        

##### load in training and validation datasets

In [9]:
fps = sorted(os_helpers.listfiles(DATA_DIR, regex='.h5ad$'))
adata_map = {fp.split('/')[-2]:{} for fp in fps}
for fp in fps:
    dtype = fp.split('/')[-2]
    disease = re.sub(r'^(.*)((_train)|(_val)).h5ad$', r'\1', fp.split('/')[-1])
    if disease not in adata_map[dtype] and '.h5ad' not in disease: adata_map[dtype][disease] = {}
    if 'train.h5ad' in fp:
        adata_map[dtype][disease]['train'] = fp
    if 'val.h5ad' in fp:
        adata_map[dtype][disease]['val'] = fp
for dtype, d in adata_map.items():
    for disease, m in d.items():
        print(dtype, disease)

scRNAseq brca
scRNAseq cesc
scRNAseq hnscc
scRNAseq melanoma
scRNAseq pbmc
scRNAseq pdac
snATACseq brca
snATACseq ccrcc
snATACseq gbm
snRNAseq brca
snRNAseq ccrcc
snRNAseq gbm


In [10]:
adata_map['snATACseq'].pop('ccrcc')

{'train': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_1/snATACseq/ccrcc_train.h5ad',
 'val': '/home/estorrs/pollock/benchmarking/data/10232020_harmonized/teir_1/snATACseq/ccrcc_val.h5ad'}

### run workflows

In [11]:
def run_workflow_for_datasets(adata_map, workflow, workflow_identifier, output_dir):
    for dtype, d in adata_map.items():
#         if dtype != 'snATACseq':
        for disease, m in d.items():
            # make dir if doesnt exist yet
            directory = os.path.join(output_dir, dtype, disease)
            Path(directory).mkdir(parents=True, exist_ok=True)
            train, val = sc.read_h5ad(m['train']), sc.read_h5ad(m['val'])

            print(dtype, disease, train.shape, val.shape)
            run_workflow(workflow, workflow_identifier,
                train, val, directory)
            
def run_workflow_for_cross_disease(adata_map, workflow, workflow_identifier, output_dir):
    for dtype, d in adata_map.items():
        for disease1, m1 in d.items():
            for disease2, m2 in d.items():
                if disease1 != disease2:
                    # make dir if doesnt exist yet
                    directory = os.path.join(output_dir, dtype, f'{disease1}_train_{disease2}_val')
                    Path(directory).mkdir(parents=True, exist_ok=True)
                    train, val = sc.read_h5ad(m1['train']), sc.read_h5ad(m2['val'])

                    print(dtype, f'{disease1}_train_{disease2}_val', train.shape, val.shape)
                    run_workflow(workflow, workflow_identifier,
                        train, val, directory)
                    
                    
def run_workflow_for_cross_datatype(adata_map, workflow, workflow_identifier, output_dir):
    for dtype1, d1 in adata_map.items():
        for dtype2, d2 in adata_map.items():
            for disease1, m1 in d1.items():
                for disease2, m2 in d2.items():
                    if dtype1 != dtype2:
                        # make dir if doesnt exist yet
                        directory = os.path.join(output_dir, dtype, f'{dtype1}_{disease1}_train_{dtype2}_{disease2}_val')
                        Path(directory).mkdir(parents=True, exist_ok=True)
                        train, val = sc.read_h5ad(m1['train']), sc.read_h5ad(m2['val'])

                        print(dtype, f'{dtype1}_{disease1}_train_{dtype2}_{disease2}_val', train.shape, val.shape)
                        run_workflow(workflow, workflow_identifier,
                            train, val, directory)

def run_workflow(workflow, workflow_identifier, train, val, output_dir):
    """
    Run the workflow defined by the workflow function.
    
    workflow function takes a train adata and a val adata as inputs,
    and returns dataframe with cell_id, groundtruth, predicted, and probability columns
    """
    # if it is pollock it needs to know where to save the module
    if workflow_identifier == 'pollock':
        df = workflow(train, val, CELL_TYPE_KEY, os.path.join(output_dir, f'{workflow_identifier}_module'))
    else:
        df = workflow(train, val, CELL_TYPE_KEY)
    df.to_csv(os.path.join(output_dir, f'{workflow_identifier}.tsv'), sep='\t', index=False, header=True)

##### pollock

In [40]:
a = set(['a', 'b', 'c', 'd'])
b = set(['b', 'd', 'c', 'e'])
a.intersection(b)
a.union(b)

{'a', 'b', 'c', 'd', 'e'}

In [45]:
def run_pollock_workflow(train, val, cell_type_key, module_fp):
    train.obs['is_validation'] = [False] * train.shape[0]
    val.obs['is_validation'] = [True] * val.shape[0]
    combined = train.concatenate(val)
    
    pds = PollockDataset(combined.copy(), cell_type_key=cell_type_key,
                     dataset_type='training', validation_key='is_validation')
    
    pm = PollockModel(pds.cell_types, pds.train_adata.shape[1], alpha=.0001, latent_dim=25)
    
    pm.fit(pds, epochs=20)
    
    # only score validation if cell types match
    train_cells = set(train.obs[cell_type_key])
    val_cells = set(val.obs[cell_type_key])
    score_val = True if len(train_cells.intersection(val_cells)) == len(train_cells.union(val_cells)) else False
    print(score_val)
    pm.save(pds, module_fp, score_train=True, score_val=score_val)

    preds = predict_from_anndata(val.copy(), module_fp, adata_batch_size=10000)
    
    df = pd.DataFrame.from_dict({
        'cell_id': preds.index.to_list(),
        'groundtruth': val.obs.loc[preds.index][cell_type_key].to_list(),
        'predicted': preds['predicted_cell_type'],
        'probability': preds['cell_type_probability']
    })

    return df

In [46]:
# run_workflow_for_datasets(adata_map, run_pollock_workflow, 'pollock', RESULTS_DIR)

In [47]:
# run_workflow_for_cross_disease(adata_map, run_pollock_workflow, 'pollock', RESULTS_CROSS_DISEASE_DIR)

In [52]:
run_workflow_for_cross_datatype(adata_map, run_pollock_workflow, 'pollock', RESULTS_CROSS_DTYPE_DIR)

snRNAseq scRNAseq_brca_train_snATACseq_brca_val (2600, 27131) (9028, 19891)


2020-11-12 12:53:26,986 input dataset shape: (11628, 17565)
2020-11-12 12:53:26,988 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 12:53:26,989 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 12:53:27,009 train shape: (2600, 17565), val shape: (9028, 17565)
2020-11-12 12:53:27,010 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 12:53:27,011 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:53:38,123 epoch: 1, train loss: 14.148760795593262, val loss: 112.47914123535156
  if not is_categorical(df_full[k]):
  if not is_categorica

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:55:09,609 epoch: 13, train loss: 11.504253387451172, val loss: 92.74026489257812
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:55:17,234 epoch: 14, train loss: 11.456613540649414, val loss: 92.35957336425781
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:55:24,831 epoch: 15, train loss: 11.401772499084473, val loss: 92.06980895996094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:55:32,457 epoch: 16, train loss: 11.35120964050293, val loss: 91.33361053466797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:55:40,194 epoch: 17, train loss: 11.304377555847168, val loss: 91.21499633789062
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:55:47,758 epoch: 18, train loss: 11.249260902404785, val loss: 90.88

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 12:56:08,624 0 genes in training set are missing from prediction set
2020-11-12 12:56:09,111 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 12:56:09,188 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_brca_train_snATACseq_brca_val/pollock_module
2020-11-12 12:56:09,189 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Erythrocyte' 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma'
 'Treg']
2020-11-12 12:56:09,379 0 genes in training set are missing from prediction set
2020-11-12 12:56:44,023 (9028, 13)
2020-11-12 12:56:44,024 {'Monocyte', 'Endothelial', 'Treg', 'Malignant', 'Dendritic', 'B cell', 'Erythrocyte', 'Fibroblast', 'Mast', 'Plasma', 'NK'}


snRNAseq scRNAseq_brca_train_snATACseq_gbm_val (2600, 27131) (5650, 19891)


2020-11-12 12:56:50,748 input dataset shape: (8250, 17565)
2020-11-12 12:56:50,750 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 12:56:50,751 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 12:56:50,769 train shape: (2600, 17565), val shape: (5650, 17565)
2020-11-12 12:56:50,771 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 12:56:50,772 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):




2020-11-12 12:56:57,941 5 out of the last 50 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
  if not is_categorical(df_full[k]):
2020-11-12 12:56:59,283 epoch: 1, train loss: 15.155159950256348, val loss: 35.349430084228516
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:57:05,619 epoch: 2, train loss: 14.320100784301758, val loss: 32.50354766845703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:57:11,827 epoch: 3, train loss: 14.200750350952148, val loss

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:58:19,398 epoch: 14, train loss: 12.479121208190918, val loss: 30.325542449951172
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:58:25,475 epoch: 15, train loss: 12.428862571716309, val loss: 30.23723602294922
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:58:31,527 epoch: 16, train loss: 12.367630004882812, val loss: 30.27475929260254
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:58:37,711 epoch: 17, train loss: 12.321467399597168, val loss: 30.229286193847656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:58:43,782 epoch: 18, train loss: 12.268769264221191, val loss: 30.209583282470703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:58:49,899 epoch: 19, train loss: 12.220223426818848, val loss: 3

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 12:59:01,085 0 genes in training set are missing from prediction set
2020-11-12 12:59:01,487 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 12:59:01,557 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_brca_train_snATACseq_gbm_val/pollock_module
2020-11-12 12:59:01,558 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Erythrocyte' 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma'
 'Treg']
2020-11-12 12:59:01,675 0 genes in training set are missing from prediction set
2020-11-12 12:59:22,224 (5650, 13)
2020-11-12 12:59:22,225 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Plasma', 'NK'}


snRNAseq scRNAseq_cesc_train_snATACseq_brca_val (1941, 22928) (9028, 19891)


2020-11-12 12:59:31,213 input dataset shape: (10969, 16509)
2020-11-12 12:59:31,216 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 12:59:31,217 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 12:59:31,246 train shape: (1941, 16509), val shape: (9028, 16509)
2020-11-12 12:59:31,248 train labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
2020-11-12 12:59:31,249 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 12:59:38,574 epoch: 1, train loss: 19.727582931518555, val loss: 95.818359375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:00:41,432 epoch: 13, train loss: 16.256467819213867, val loss: 83.27252197265625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:00:46,612 epoch: 14, train loss: 16.13418197631836, val loss: 82.97295379638672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:00:51,764 epoch: 15, train loss: 16.06208038330078, val loss: 82.79888916015625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:00:56,853 epoch: 16, train loss: 16.007375717163086, val loss: 82.58894348144531
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:01:02,043 epoch: 17, train loss: 15.935098648071289, val loss: 82.4585189819336
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:01:07,282 epoch: 18, train loss: 15.860664367675781, val loss: 82.3445

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:01:22,484 0 genes in training set are missing from prediction set
2020-11-12 13:01:22,889 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:01:22,951 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_cesc_train_snATACseq_brca_val/pollock_module
2020-11-12 13:01:22,952 ['CD4 T cell' 'CD8 T cell' 'Endothelial' 'Epithelial' 'Erythrocyte'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma']
2020-11-12 13:01:23,136 0 genes in training set are missing from prediction set
2020-11-12 13:01:59,548 (9028, 11)
2020-11-12 13:01:59,548 {'Monocyte', 'Endothelial', 'CD4 T cell', 'CD8 T cell', 'Malignant', 'Fibroblast', 'Mast', 'Plasma', 'Epithelial'}


snRNAseq scRNAseq_cesc_train_snATACseq_gbm_val (1941, 22928) (5650, 19891)


2020-11-12 13:02:05,495 input dataset shape: (7591, 16509)
2020-11-12 13:02:05,498 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-12 13:02:05,499 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:02:05,515 train shape: (1941, 16509), val shape: (5650, 16509)
2020-11-12 13:02:05,516 train labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
2020-11-12 13:02:05,517 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:02:11,923 epoch: 1, train loss: 21.458484649658203, val loss: 30.824466705322266
  if not is_categorical(df_full[k]):
  if not is_ca

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:03:07,850 epoch: 13, train loss: 17.55634307861328, val loss: 27.370681762695312
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:03:12,478 epoch: 14, train loss: 17.556720733642578, val loss: 27.3211669921875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:03:17,157 epoch: 15, train loss: 17.387252807617188, val loss: 27.24822425842285
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:03:21,723 epoch: 16, train loss: 17.27630043029785, val loss: 27.209815979003906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:03:26,319 epoch: 17, train loss: 17.219758987426758, val loss: 27.179840087890625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:03:30,943 epoch: 18, train loss: 17.161897659301758, val loss: 27.1

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:03:44,170 0 genes in training set are missing from prediction set
2020-11-12 13:03:44,479 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:03:44,535 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_cesc_train_snATACseq_gbm_val/pollock_module
2020-11-12 13:03:44,536 ['CD4 T cell' 'CD8 T cell' 'Endothelial' 'Epithelial' 'Erythrocyte'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma']
2020-11-12 13:03:44,636 0 genes in training set are missing from prediction set
2020-11-12 13:04:05,966 (5650, 11)
2020-11-12 13:04:05,967 {'Monocyte', 'Endothelial', 'CD4 T cell', 'CD8 T cell', 'Malignant', 'Fibroblast', 'Erythrocyte', 'Plasma', 'Epithelial'}


snRNAseq scRNAseq_hnscc_train_snATACseq_brca_val (2200, 26929) (9028, 19891)


2020-11-12 13:04:15,494 input dataset shape: (11228, 17615)
2020-11-12 13:04:15,496 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:04:15,497 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:04:15,518 train shape: (2200, 17615), val shape: (9028, 17615)
2020-11-12 13:04:15,520 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:04:15,521 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:04:23,499 epoch: 1, train loss: 15.698946952819824, val loss: 98.63905334472656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:05:33,171 epoch: 13, train loss: 12.814157485961914, val loss: 87.10173034667969
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:05:38,928 epoch: 14, train loss: 12.83912181854248, val loss: 86.98909759521484
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:05:44,680 epoch: 15, train loss: 12.698729515075684, val loss: 86.81157684326172
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:05:50,475 epoch: 16, train loss: 12.627863883972168, val loss: 86.6489486694336
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:05:56,194 epoch: 17, train loss: 12.577813148498535, val loss: 86.44992065429688
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:06:01,908 epoch: 18, train loss: 12.570856094360352, val loss: 86.221

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:06:18,893 0 genes in training set are missing from prediction set
2020-11-12 13:06:19,296 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:06:19,362 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_hnscc_train_snATACseq_brca_val/pollock_module
2020-11-12 13:06:19,363 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Endothelial' 'Erythrocyte'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:06:19,550 0 genes in training set are missing from prediction set
2020-11-12 13:06:55,652 (9028, 11)
2020-11-12 13:06:55,654 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Malignant', 'B cell', 'Erythrocyte', 'Plasma'}


snRNAseq scRNAseq_hnscc_train_snATACseq_gbm_val (2200, 26929) (5650, 19891)


2020-11-12 13:07:02,222 input dataset shape: (7850, 17615)
2020-11-12 13:07:02,224 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 13:07:02,225 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:07:02,238 train shape: (2200, 17615), val shape: (5650, 17615)
2020-11-12 13:07:02,240 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:07:02,240 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):




2020-11-12 13:07:08,300 5 out of the last 43 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
  if not is_categorical(df_full[k]):
2020-11-12 13:07:09,367 epoch: 1, train loss: 16.912015914916992, val loss: 31.947193145751953
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:07:14,577 epoch: 2, train loss: 15.931706428527832, val loss: 30.945762634277344
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:07:19,738 epoch: 3, train loss: 15.810691833496094, val los

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:08:16,341 epoch: 14, train loss: 14.006688117980957, val loss: 28.808042526245117
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:08:21,481 epoch: 15, train loss: 13.953377723693848, val loss: 28.807296752929688
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:08:26,570 epoch: 16, train loss: 13.864034652709961, val loss: 28.786693572998047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:08:31,719 epoch: 17, train loss: 13.776205062866211, val loss: 28.812501907348633
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:08:36,882 epoch: 18, train loss: 13.753978729248047, val loss: 28.812397003173828
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:08:41,986 epoch: 19, train loss: 13.669191360473633, val loss:

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:08:51,647 0 genes in training set are missing from prediction set
2020-11-12 13:08:52,110 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:08:52,169 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_hnscc_train_snATACseq_gbm_val/pollock_module
2020-11-12 13:08:52,170 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Endothelial' 'Erythrocyte'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:08:52,285 0 genes in training set are missing from prediction set
2020-11-12 13:09:14,404 (5650, 11)
2020-11-12 13:09:14,405 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Malignant', 'B cell', 'Plasma', 'NK'}


snRNAseq scRNAseq_melanoma_train_snATACseq_brca_val (2000, 23452) (9028, 19891)


2020-11-12 13:09:23,311 input dataset shape: (11028, 16275)
2020-11-12 13:09:23,313 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:09:23,314 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:09:23,334 train shape: (2000, 16275), val shape: (9028, 16275)
2020-11-12 13:09:23,336 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:09:23,336 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:09:30,692 epoch: 1, train loss: 14.030254364013672, val loss: 102.9658432006836
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:09:36,037 epoch: 2, train

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:10:33,173 epoch: 13, train loss: 9.539754867553711, val loss: 93.7333984375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:10:38,376 epoch: 14, train loss: 9.491135597229004, val loss: 93.7606430053711
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:10:43,624 epoch: 15, train loss: 9.401084899902344, val loss: 93.48091888427734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:10:48,864 epoch: 16, train loss: 9.385795593261719, val loss: 92.90782928466797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:10:54,108 epoch: 17, train loss: 9.317974090576172, val loss: 92.86665344238281
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:10:59,240 epoch: 18, train loss: 9.29171085357666, val loss: 92.4864425659179

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:11:14,241 0 genes in training set are missing from prediction set
2020-11-12 13:11:14,580 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:11:14,647 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_melanoma_train_snATACseq_brca_val/pollock_module
2020-11-12 13:11:14,648 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Fibroblast' 'Malignant'
 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:11:14,790 0 genes in training set are missing from prediction set
2020-11-12 13:11:50,040 (9028, 10)
2020-11-12 13:11:50,042 {'Monocyte', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Fibroblast', 'Plasma', 'NK'}


snRNAseq scRNAseq_melanoma_train_snATACseq_gbm_val (2000, 23452) (5650, 19891)


2020-11-12 13:11:56,869 input dataset shape: (7650, 16275)
2020-11-12 13:11:56,873 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 13:11:56,875 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:11:56,898 train shape: (2000, 16275), val shape: (5650, 16275)
2020-11-12 13:11:56,900 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:11:56,901 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:12:03,349 epoch: 1, train loss: 14.799345970153809, val loss: 34.46803665161133
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:1

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:12:57,607 epoch: 13, train loss: 10.373287200927734, val loss: 30.81534194946289
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:13:02,082 epoch: 14, train loss: 10.349776268005371, val loss: 30.71155548095703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:13:06,635 epoch: 15, train loss: 10.341835021972656, val loss: 30.671064376831055
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:13:11,169 epoch: 16, train loss: 10.293456077575684, val loss: 30.618833541870117
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:13:15,604 epoch: 17, train loss: 10.244905471801758, val loss: 30.58670997619629
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:13:20,166 epoch: 18, train loss: 10.138236045837402, val loss: 30

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:13:32,992 0 genes in training set are missing from prediction set
2020-11-12 13:13:33,444 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:13:33,501 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_melanoma_train_snATACseq_gbm_val/pollock_module
2020-11-12 13:13:33,502 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Fibroblast' 'Malignant'
 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:13:33,585 0 genes in training set are missing from prediction set
2020-11-12 13:13:53,803 (5650, 10)
2020-11-12 13:13:53,804 {'Monocyte', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Plasma'}


snRNAseq scRNAseq_pbmc_train_snATACseq_brca_val (940, 32738) (9028, 19891)


2020-11-12 13:14:05,591 input dataset shape: (9968, 18919)
2020-11-12 13:14:05,594 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Treg']
2020-11-12 13:14:05,595 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:14:05,613 train shape: (940, 18919), val shape: (9028, 18919)
2020-11-12 13:14:05,615 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
2020-11-12 13:14:05,616 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:14:11,959 epoch: 1, train loss: 9.464248657226562, val loss: 104.01560974121094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:14:16,024 epoch: 2, train loss: 8.45656013488769

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:14:54,675 epoch: 13, train loss: 6.155814170837402, val loss: 118.42164611816406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:14:58,231 epoch: 14, train loss: 6.08473014831543, val loss: 119.12492370605469
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:15:01,763 epoch: 15, train loss: 6.002589225769043, val loss: 118.47379302978516
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:15:05,312 epoch: 16, train loss: 5.955848217010498, val loss: 118.38456726074219
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:15:08,853 epoch: 17, train loss: 5.935043811798096, val loss: 117.41747283935547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:15:12,386 epoch: 18, train loss: 5.902036190032959, val loss: 116.88

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:15:23,551 0 genes in training set are missing from prediction set
2020-11-12 13:15:24,023 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:15:24,080 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pbmc_train_snATACseq_brca_val/pollock_module
2020-11-12 13:15:24,081 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Megakaryocyte' 'Monocyte'
 'NK']
2020-11-12 13:15:24,214 0 genes in training set are missing from prediction set
2020-11-12 13:16:03,717 (9028, 7)
2020-11-12 13:16:03,721 {'Monocyte', 'CD8 T cell', 'Dendritic', 'B cell', 'NK'}


snRNAseq scRNAseq_pbmc_train_snATACseq_gbm_val (940, 32738) (5650, 19891)


2020-11-12 13:16:09,680 input dataset shape: (6590, 18919)
2020-11-12 13:16:09,683 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 13:16:09,684 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:16:09,695 train shape: (940, 18919), val shape: (5650, 18919)
2020-11-12 13:16:09,697 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
2020-11-12 13:16:09,698 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):




2020-11-12 13:16:13,853 5 out of the last 22 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.




2020-11-12 13:16:13,943 6 out of the last 23 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
  if not is_categorical(df_full[k]):
2020-11-12 13:16:14,566 epoch: 1, train loss: 10.023908615112305, val loss: 34.77587127685547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:16:17,427 epoch: 2, train loss: 8.883955955505371, val loss: 33.68832015991211
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:16:20,323 epoch: 3, train loss: 7.594048500061035, val loss: 3

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:16:51,308 epoch: 14, train loss: 6.606990337371826, val loss: 36.844032287597656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:16:54,123 epoch: 15, train loss: 6.5328369140625, val loss: 36.90156173706055
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:16:56,914 epoch: 16, train loss: 6.4414801597595215, val loss: 36.76554489135742
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:16:59,720 epoch: 17, train loss: 6.402044296264648, val loss: 36.53397750854492
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:17:02,502 epoch: 18, train loss: 6.368263244628906, val loss: 36.56194305419922
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:17:05,291 epoch: 19, train loss: 6.334833145141602, val loss: 36.4484024

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:17:11,137 0 genes in training set are missing from prediction set
2020-11-12 13:17:11,604 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:17:11,653 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pbmc_train_snATACseq_gbm_val/pollock_module
2020-11-12 13:17:11,654 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Megakaryocyte' 'Monocyte'
 'NK']
2020-11-12 13:17:11,735 0 genes in training set are missing from prediction set
2020-11-12 13:17:35,610 (5650, 7)
2020-11-12 13:17:35,612 {'Monocyte', 'CD8 T cell', 'Dendritic', 'B cell', 'NK'}


snRNAseq scRNAseq_pdac_train_snATACseq_brca_val (3296, 28756) (9028, 19891)


2020-11-12 13:17:49,828 input dataset shape: (12324, 17904)
2020-11-12 13:17:49,833 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 13:17:49,834 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:17:49,856 train shape: (3296, 17904), val shape: (9028, 17904)
2020-11-12 13:17:49,857 train labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 13:17:49,858 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:18:01,342 epoch: 1, train loss: 14.770638465881348, val lo

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:19:38,432 epoch: 13, train loss: 12.29512882232666, val loss: 87.2849349975586
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:19:46,457 epoch: 14, train loss: 12.21860408782959, val loss: 87.12706756591797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:19:54,408 epoch: 15, train loss: 12.159963607788086, val loss: 87.2307357788086
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:20:02,359 epoch: 16, train loss: 12.093198776245117, val loss: 87.09492492675781
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:20:10,286 epoch: 17, train loss: 12.046549797058105, val loss: 86.93444061279297
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:20:18,071 epoch: 18, train loss: 12.005940437316895, val loss: 87.08102

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:20:40,320 0 genes in training set are missing from prediction set
2020-11-12 13:20:40,810 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:20:40,900 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pdac_train_snATACseq_brca_val/pollock_module
2020-11-12 13:20:40,901 ['Acinar' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Epithelial' 'Erythrocyte' 'Fibroblast' 'Islet' 'Malignant' 'Mast'
 'Monocyte' 'NK' 'Plasma' 'Treg' 'Tuft']
2020-11-12 13:20:41,091 0 genes in training set are missing from prediction set
2020-11-12 13:21:15,950 (9028, 17)
2020-11-12 13:21:15,952 {'Monocyte', 'Endothelial', 'Treg', 'Acinar', 'CD8 T cell', 'Dendritic', 'Malignant', 'Epithelial', 'Tuft', 'Fibroblast', 'Erythrocyte', 'Plasma', 'Islet', 'NK'}


snRNAseq scRNAseq_pdac_train_snATACseq_gbm_val (3296, 28756) (5650, 19891)


2020-11-12 13:21:23,242 input dataset shape: (8946, 17904)
2020-11-12 13:21:23,244 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-12 13:21:23,245 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:21:23,265 train shape: (3296, 17904), val shape: (5650, 17904)
2020-11-12 13:21:23,266 train labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 13:21:23,267 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:21:32,644 epoch: 1, train 

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:22:55,118 epoch: 13, train loss: 13.453126907348633, val loss: 29.41242027282715
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:23:01,952 epoch: 14, train loss: 13.405933380126953, val loss: 29.379850387573242
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:23:08,607 epoch: 15, train loss: 13.339804649353027, val loss: 29.340347290039062
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:23:15,474 epoch: 16, train loss: 13.266844749450684, val loss: 29.264053344726562
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:23:22,194 epoch: 17, train loss: 13.2191162109375, val loss: 29.191967010498047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:23:28,953 epoch: 18, train loss: 13.163954734802246, val loss: 29

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:23:48,093 0 genes in training set are missing from prediction set
2020-11-12 13:23:48,560 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:23:48,634 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pdac_train_snATACseq_gbm_val/pollock_module
2020-11-12 13:23:48,635 ['Acinar' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Epithelial' 'Erythrocyte' 'Fibroblast' 'Islet' 'Malignant' 'Mast'
 'Monocyte' 'NK' 'Plasma' 'Treg' 'Tuft']
2020-11-12 13:23:48,717 0 genes in training set are missing from prediction set
2020-11-12 13:24:09,442 (5650, 17)
2020-11-12 13:24:09,443 {'Monocyte', 'Endothelial', 'Acinar', 'CD8 T cell', 'Malignant', 'Dendritic', 'Epithelial', 'B cell', 'Erythrocyte', 'Fibroblast', 'Plasma', 'Islet', 'NK'}


snRNAseq scRNAseq_brca_train_snRNAseq_brca_val (2600, 27131) (9490, 29175)


2020-11-12 13:24:21,244 input dataset shape: (12090, 25674)
2020-11-12 13:24:21,247 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:24:21,247 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:24:21,261 train shape: (2600, 25674), val shape: (9490, 25674)
2020-11-12 13:24:21,263 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:24:21,264 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:24:32,474 epoch: 1, train loss: 30.03271484375, val loss: 54.04741668701172
  if not is_categorical(df_f

2020-11-12 13:26:04,026 epoch: 12, train loss: 26.763952255249023, val loss: 50.38566970825195
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:26:12,453 epoch: 13, train loss: 26.683664321899414, val loss: 50.336673736572266
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:26:20,976 epoch: 14, train loss: 26.56901741027832, val loss: 50.27118682861328
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:26:29,316 epoch: 15, train loss: 26.454654693603516, val loss: 50.244476318359375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:26:37,675 epoch: 16, train loss: 26.352073669433594, val loss: 50.25196838378906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:26:46,120 epoch: 17, train loss: 26.240346908569336, val loss: 50.28573989868164
  if not is_categorical(df_full[k]):
  if not is_categorica

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:27:18,139 0 genes in training set are missing from prediction set
2020-11-12 13:27:18,726 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:27:18,796 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_brca_train_snRNAseq_brca_val/pollock_module
2020-11-12 13:27:18,797 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Erythrocyte' 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma'
 'Treg']
2020-11-12 13:27:18,862 0 genes in training set are missing from prediction set
2020-11-12 13:27:43,416 (9490, 13)
2020-11-12 13:27:43,417 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Erythrocyte', 'Fibroblast', 'Mast', 'Plasma', 'NK'}


snRNAseq scRNAseq_brca_train_snRNAseq_ccrcc_val (2600, 27131) (8605, 33538)


2020-11-12 13:27:54,748 input dataset shape: (11205, 27131)
2020-11-12 13:27:54,751 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:27:54,752 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:27:54,766 train shape: (2600, 27131), val shape: (8605, 27131)
2020-11-12 13:27:54,768 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:27:54,768 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:28:06,008 epoch: 1, train loss: 33.53917694091797, val loss: 65.31121826171875
  if not is_categorical(df_full[k]):
  if

2020-11-12 13:29:42,155 epoch: 12, train loss: 30.421327590942383, val loss: 61.147274017333984
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:29:50,899 epoch: 13, train loss: 30.312049865722656, val loss: 61.214599609375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:29:59,634 epoch: 14, train loss: 30.164737701416016, val loss: 61.33417510986328
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:30:08,492 epoch: 15, train loss: 30.05417251586914, val loss: 61.30610656738281
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:30:17,201 epoch: 16, train loss: 29.96186637878418, val loss: 61.333274841308594
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:30:25,957 epoch: 17, train loss: 29.859785079956055, val loss: 61.531166076660156
  if not is_categorical(df_full[k]):
  if not is_categorical(

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:30:59,675 0 genes in training set are missing from prediction set
2020-11-12 13:31:00,285 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:31:00,364 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_brca_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 13:31:00,365 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Erythrocyte' 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma'
 'Treg']
2020-11-12 13:31:00,430 0 genes in training set are missing from prediction set
2020-11-12 13:31:24,146 (8605, 13)
2020-11-12 13:31:24,148 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Erythrocyte', 'Fibroblast', 'Plasma', 'NK'}


snRNAseq scRNAseq_brca_train_snRNAseq_gbm_val (2600, 27131) (6810, 29748)


2020-11-12 13:31:33,487 input dataset shape: (9410, 25705)
2020-11-12 13:31:33,490 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 13:31:33,491 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:31:33,512 train shape: (2600, 25705), val shape: (6810, 25705)
2020-11-12 13:31:33,514 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:31:33,515 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:31:43,649 epoch: 1, train loss: 32.74538040161133, val loss: 49.0059928894043
  if not is_catego

2020-11-12 13:33:09,029 epoch: 12, train loss: 29.413063049316406, val loss: 46.04637908935547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:33:16,762 epoch: 13, train loss: 29.306753158569336, val loss: 46.024810791015625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:33:24,461 epoch: 14, train loss: 29.191587448120117, val loss: 46.02787780761719
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:33:32,386 epoch: 15, train loss: 29.095373153686523, val loss: 46.015357971191406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:33:40,408 epoch: 16, train loss: 28.95354461669922, val loss: 46.14450454711914
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:33:48,207 epoch: 17, train loss: 28.830507278442383, val loss: 46.166969299316406
  if not is_categorical(df_full[k]):
  if not is_categoric

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:34:18,127 0 genes in training set are missing from prediction set
2020-11-12 13:34:18,689 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:34:18,743 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_brca_train_snRNAseq_gbm_val/pollock_module
2020-11-12 13:34:18,744 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Erythrocyte' 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma'
 'Treg']
2020-11-12 13:34:18,790 0 genes in training set are missing from prediction set
2020-11-12 13:34:36,633 (6810, 13)
2020-11-12 13:34:36,634 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Erythrocyte', 'Plasma', 'NK'}


snRNAseq scRNAseq_cesc_train_snRNAseq_brca_val (1941, 22928) (9490, 29175)


2020-11-12 13:34:46,525 input dataset shape: (11431, 22001)
2020-11-12 13:34:46,527 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:34:46,528 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:34:46,538 train shape: (1941, 22001), val shape: (9490, 22001)
2020-11-12 13:34:46,540 train labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
2020-11-12 13:34:46,541 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:34:54,950 epoch: 1, train loss: 39.202301025390625, val loss: 47.81297302246094
  if not is_categorical(df_

2020-11-12 13:36:01,885 epoch: 12, train loss: 34.53059005737305, val loss: 44.43861770629883
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:36:08,007 epoch: 13, train loss: 34.22859191894531, val loss: 44.39051055908203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:36:14,143 epoch: 14, train loss: 34.242210388183594, val loss: 44.43193817138672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:36:20,241 epoch: 15, train loss: 34.15685272216797, val loss: 44.41658401489258
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:36:26,360 epoch: 16, train loss: 34.03807830810547, val loss: 44.42867660522461
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:36:32,576 epoch: 17, train loss: 33.80657196044922, val loss: 44.435768127441406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:36:56,722 0 genes in training set are missing from prediction set
2020-11-12 13:36:57,254 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:36:57,313 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_cesc_train_snRNAseq_brca_val/pollock_module
2020-11-12 13:36:57,314 ['CD4 T cell' 'CD8 T cell' 'Endothelial' 'Epithelial' 'Erythrocyte'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma']
2020-11-12 13:36:57,376 0 genes in training set are missing from prediction set
2020-11-12 13:37:23,202 (9490, 11)
2020-11-12 13:37:23,202 {'Monocyte', 'Endothelial', 'CD4 T cell', 'CD8 T cell', 'Malignant', 'Mast', 'Fibroblast', 'Erythrocyte', 'Plasma', 'Epithelial'}


snRNAseq scRNAseq_cesc_train_snRNAseq_ccrcc_val (1941, 22928) (8605, 33538)


2020-11-12 13:37:32,699 input dataset shape: (10546, 22919)
2020-11-12 13:37:32,701 possible cell types: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:37:32,702 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:37:32,719 train shape: (1941, 22919), val shape: (8605, 22919)
2020-11-12 13:37:32,720 train labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
2020-11-12 13:37:32,721 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:37:41,542 epoch: 1, train loss: 41.570030212402344, val loss: 57.091583251953125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_fu

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:38:56,028 epoch: 13, train loss: 36.934600830078125, val loss: 53.257808685302734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:39:02,345 epoch: 14, train loss: 36.534446716308594, val loss: 53.34333419799805
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:39:08,397 epoch: 15, train loss: 36.74250411987305, val loss: 53.37657928466797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:39:14,626 epoch: 16, train loss: 36.515846252441406, val loss: 53.339935302734375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:39:20,935 epoch: 17, train loss: 36.46086502075195, val loss: 53.40251922607422
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:39:27,160 epoch: 18, train loss: 36.117916107177734, val loss: 53.5

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:39:45,620 0 genes in training set are missing from prediction set
2020-11-12 13:39:46,232 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:39:46,301 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_cesc_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 13:39:46,302 ['CD4 T cell' 'CD8 T cell' 'Endothelial' 'Epithelial' 'Erythrocyte'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma']
2020-11-12 13:39:46,392 0 genes in training set are missing from prediction set
2020-11-12 13:40:10,703 (8605, 11)
2020-11-12 13:40:10,705 {'Monocyte', 'Endothelial', 'CD4 T cell', 'CD8 T cell', 'Malignant', 'Mast', 'Fibroblast', 'Erythrocyte', 'Plasma', 'Epithelial', 'NK'}


snRNAseq scRNAseq_cesc_train_snRNAseq_gbm_val (1941, 22928) (6810, 29748)


2020-11-12 13:40:18,605 input dataset shape: (8751, 21981)
2020-11-12 13:40:18,607 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-12 13:40:18,608 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:40:18,628 train shape: (1941, 21981), val shape: (6810, 21981)
2020-11-12 13:40:18,629 train labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
2020-11-12 13:40:18,630 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:40:26,308 epoch: 1, train loss: 41.706031799316406, val loss: 43.24296951293945
  if not is_categorical(df_full[k]):
  i

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:41:34,841 epoch: 13, train loss: 36.9448356628418, val loss: 40.031349182128906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:41:40,462 epoch: 14, train loss: 36.62485885620117, val loss: 39.99913024902344
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:41:45,977 epoch: 15, train loss: 36.41255569458008, val loss: 39.99627685546875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:41:51,691 epoch: 16, train loss: 36.26914596557617, val loss: 39.983158111572266
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:41:57,425 epoch: 17, train loss: 36.17972183227539, val loss: 40.012508392333984
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:42:03,116 epoch: 18, train loss: 35.91050338745117, val loss: 40.01704

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:42:19,639 0 genes in training set are missing from prediction set
2020-11-12 13:42:20,220 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:42:20,289 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_cesc_train_snRNAseq_gbm_val/pollock_module
2020-11-12 13:42:20,290 ['CD4 T cell' 'CD8 T cell' 'Endothelial' 'Epithelial' 'Erythrocyte'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma']
2020-11-12 13:42:20,360 0 genes in training set are missing from prediction set
2020-11-12 13:42:38,945 (6810, 11)
2020-11-12 13:42:38,946 {'Monocyte', 'Endothelial', 'CD4 T cell', 'CD8 T cell', 'Malignant', 'Mast', 'Fibroblast', 'Plasma', 'Epithelial'}


snRNAseq scRNAseq_hnscc_train_snRNAseq_brca_val (2200, 26929) (9490, 29175)


2020-11-12 13:42:50,375 input dataset shape: (11690, 25299)
2020-11-12 13:42:50,377 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:42:50,378 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:42:50,391 train shape: (2200, 25299), val shape: (9490, 25299)
2020-11-12 13:42:50,393 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:42:50,394 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:43:00,747 epoch: 1, train loss: 32.1065788269043, val loss: 50.526092529296875
  if not is_categorical(df_full[k]):
  if not is_cat

2020-11-12 13:44:23,843 epoch: 12, train loss: 28.75851058959961, val loss: 47.32429504394531
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:44:31,472 epoch: 13, train loss: 28.434701919555664, val loss: 47.248191833496094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:44:39,116 epoch: 14, train loss: 28.33710479736328, val loss: 47.254215240478516
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:44:46,588 epoch: 15, train loss: 28.18842124938965, val loss: 47.238182067871094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:44:54,025 epoch: 16, train loss: 28.076242446899414, val loss: 47.231597900390625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:45:01,320 epoch: 17, train loss: 27.985027313232422, val loss: 47.24986267089844
  if not is_categorical(df_full[k]):
  if not is_categorica

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:45:30,663 0 genes in training set are missing from prediction set
2020-11-12 13:45:31,284 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:45:31,645 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_hnscc_train_snRNAseq_brca_val/pollock_module
2020-11-12 13:45:31,646 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Endothelial' 'Erythrocyte'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:45:31,727 0 genes in training set are missing from prediction set
2020-11-12 13:45:58,498 (9490, 11)
2020-11-12 13:45:58,498 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'B cell', 'Mast', 'Erythrocyte', 'Plasma'}


snRNAseq scRNAseq_hnscc_train_snRNAseq_ccrcc_val (2200, 26929) (8605, 33538)


2020-11-12 13:46:09,511 input dataset shape: (10805, 26918)
2020-11-12 13:46:09,513 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:46:09,514 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:46:09,527 train shape: (2200, 26918), val shape: (8605, 26918)
2020-11-12 13:46:09,529 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:46:09,530 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:46:19,513 epoch: 1, train loss: 34.821434020996094, val loss: 61.58780288696289
  if not is_categorical(df_full[k]):
  if not is_categorical(df_ful

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:47:46,890 epoch: 13, train loss: 31.270179748535156, val loss: 57.38459014892578
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:47:54,270 epoch: 14, train loss: 31.221498489379883, val loss: 57.31968688964844
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:48:01,745 epoch: 15, train loss: 30.986412048339844, val loss: 57.35319137573242
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:48:09,014 epoch: 16, train loss: 30.95348358154297, val loss: 57.52272033691406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:48:16,555 epoch: 17, train loss: 30.769433975219727, val loss: 57.477149963378906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:48:23,992 epoch: 18, train loss: 30.782075881958008, val loss: 57.5

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:48:45,952 0 genes in training set are missing from prediction set
2020-11-12 13:48:46,562 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:48:46,630 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_hnscc_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 13:48:46,631 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Endothelial' 'Erythrocyte'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:48:46,695 0 genes in training set are missing from prediction set
2020-11-12 13:49:12,598 (8605, 11)
2020-11-12 13:49:12,600 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'B cell', 'Erythrocyte', 'Mast', 'Plasma', 'NK'}


snRNAseq scRNAseq_hnscc_train_snRNAseq_gbm_val (2200, 26929) (6810, 29748)


2020-11-12 13:49:21,783 input dataset shape: (9010, 25389)
2020-11-12 13:49:21,785 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 13:49:21,786 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:49:21,803 train shape: (2200, 25389), val shape: (6810, 25389)
2020-11-12 13:49:21,805 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:49:21,806 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:49:30,896 epoch: 1, train loss: 34.462158203125, val loss: 46.04787063598633
  if not is_categorical(df_full[k]):
  if not is_categorica

2020-11-12 13:50:44,320 epoch: 12, train loss: 31.091136932373047, val loss: 43.371620178222656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:50:51,022 epoch: 13, train loss: 30.947845458984375, val loss: 43.37408447265625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:50:57,791 epoch: 14, train loss: 30.77601432800293, val loss: 43.36945343017578
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:51:04,461 epoch: 15, train loss: 30.63218116760254, val loss: 43.35223388671875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:51:11,216 epoch: 16, train loss: 30.61437225341797, val loss: 43.347259521484375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:51:17,984 epoch: 17, train loss: 30.524351119995117, val loss: 43.36451721191406
  if not is_categorical(df_full[k]):
  if not is_categorical(

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:51:43,863 0 genes in training set are missing from prediction set
2020-11-12 13:51:44,454 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:51:44,523 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_hnscc_train_snRNAseq_gbm_val/pollock_module
2020-11-12 13:51:44,524 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Endothelial' 'Erythrocyte'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:51:44,574 0 genes in training set are missing from prediction set
2020-11-12 13:52:04,360 (6810, 11)
2020-11-12 13:52:04,362 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'B cell', 'Erythrocyte', 'Plasma', 'NK'}


snRNAseq scRNAseq_melanoma_train_snRNAseq_brca_val (2000, 23452) (9490, 29175)


2020-11-12 13:52:13,707 input dataset shape: (11490, 21018)
2020-11-12 13:52:13,709 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:52:13,710 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:52:13,722 train shape: (2000, 21018), val shape: (9490, 21018)
2020-11-12 13:52:13,725 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:52:13,725 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:52:22,029 epoch: 1, train loss: 26.343088150024414, val loss: 53.20616149902344
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
202

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:53:32,588 epoch: 13, train loss: 21.5950984954834, val loss: 50.51701354980469
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:53:38,604 epoch: 14, train loss: 21.442760467529297, val loss: 50.43399429321289
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:53:44,569 epoch: 15, train loss: 21.373075485229492, val loss: 50.4062614440918
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:53:50,474 epoch: 16, train loss: 21.246240615844727, val loss: 50.39027786254883
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:53:56,355 epoch: 17, train loss: 21.170413970947266, val loss: 50.327980041503906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:54:02,239 epoch: 18, train loss: 21.153915405273438, val loss: 50.358

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:54:19,531 0 genes in training set are missing from prediction set
2020-11-12 13:54:20,453 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:54:20,495 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_melanoma_train_snRNAseq_brca_val/pollock_module
2020-11-12 13:54:20,496 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Fibroblast' 'Malignant'
 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:54:20,547 0 genes in training set are missing from prediction set
2020-11-12 13:54:42,173 (9490, 10)
2020-11-12 13:54:42,174 {'Monocyte', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Plasma', 'NK'}


snRNAseq scRNAseq_melanoma_train_snRNAseq_ccrcc_val (2000, 23452) (8605, 33538)


2020-11-12 13:54:51,192 input dataset shape: (10605, 21975)
2020-11-12 13:54:51,194 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:54:51,195 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:54:51,213 train shape: (2000, 21975), val shape: (8605, 21975)
2020-11-12 13:54:51,215 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:54:51,215 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:54:59,514 epoch: 1, train loss: 30.34039878845215, val loss: 64.51455688476562
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:55:05,560 epoc

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:56:12,641 epoch: 13, train loss: 25.65884017944336, val loss: 61.55281066894531
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:56:18,730 epoch: 14, train loss: 25.545116424560547, val loss: 61.368988037109375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:56:24,789 epoch: 15, train loss: 25.4215145111084, val loss: 61.50493621826172
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:56:30,865 epoch: 16, train loss: 25.22417640686035, val loss: 61.383514404296875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:56:36,927 epoch: 17, train loss: 25.2484130859375, val loss: 61.41523742675781
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:56:42,972 epoch: 18, train loss: 25.060462951660156, val loss: 61.38792

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:57:00,811 0 genes in training set are missing from prediction set
2020-11-12 13:57:01,336 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:57:01,401 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_melanoma_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 13:57:01,403 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Fibroblast' 'Malignant'
 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:57:01,465 0 genes in training set are missing from prediction set
2020-11-12 13:57:22,128 (8605, 10)
2020-11-12 13:57:22,129 {'Monocyte', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Fibroblast', 'Plasma', 'NK'}


snRNAseq scRNAseq_melanoma_train_snRNAseq_gbm_val (2000, 23452) (6810, 29748)


2020-11-12 13:57:29,728 input dataset shape: (8810, 21069)
2020-11-12 13:57:29,730 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 13:57:29,731 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:57:29,757 train shape: (2000, 21069), val shape: (6810, 21069)
2020-11-12 13:57:29,758 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:57:29,759 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:57:37,498 epoch: 1, train loss: 28.472896575927734, val loss: 48.63994216918945
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
202

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:58:46,780 epoch: 13, train loss: 23.900968551635742, val loss: 46.267852783203125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:58:52,717 epoch: 14, train loss: 23.765419006347656, val loss: 46.257049560546875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:58:58,590 epoch: 15, train loss: 23.596576690673828, val loss: 46.237831115722656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:59:04,546 epoch: 16, train loss: 23.58051872253418, val loss: 46.204376220703125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:59:10,272 epoch: 17, train loss: 23.47113037109375, val loss: 46.181617736816406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 13:59:15,960 epoch: 18, train loss: 23.31307029724121, val loss: 46

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 13:59:32,369 0 genes in training set are missing from prediction set
2020-11-12 13:59:32,888 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 13:59:32,953 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_melanoma_train_snRNAseq_gbm_val/pollock_module
2020-11-12 13:59:32,954 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Fibroblast' 'Malignant'
 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 13:59:33,002 0 genes in training set are missing from prediction set
2020-11-12 13:59:48,626 (6810, 10)
2020-11-12 13:59:48,627 {'Monocyte', 'CD8 T cell', 'Malignant', 'Dendritic', 'Fibroblast', 'Plasma'}


snRNAseq scRNAseq_pbmc_train_snRNAseq_brca_val (940, 32738) (9490, 29175)


2020-11-12 13:59:55,943 input dataset shape: (10430, 18731)
2020-11-12 13:59:55,945 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 13:59:55,946 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 13:59:55,954 train shape: (940, 18731), val shape: (9490, 18731)
2020-11-12 13:59:55,956 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
2020-11-12 13:59:55,956 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):




2020-11-12 14:00:00,796 5 out of the last 12 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
  if not is_categorical(df_full[k]):
2020-11-12 14:00:01,673 epoch: 1, train loss: 16.889202117919922, val loss: 46.17963409423828
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:00:05,250 epoch: 2, train loss: 15.52927017211914, val loss: 45.50379180908203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:00:08,796 epoch: 3, train loss: 14.119014739990234, val loss: 

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:00:48,080 epoch: 14, train loss: 13.01919174194336, val loss: 47.3941650390625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:00:51,624 epoch: 15, train loss: 12.955965995788574, val loss: 47.422889709472656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:00:55,158 epoch: 16, train loss: 12.938386917114258, val loss: 47.34306335449219
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:00:58,705 epoch: 17, train loss: 12.921722412109375, val loss: 47.315162658691406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:01:02,238 epoch: 18, train loss: 12.875513076782227, val loss: 47.2798957824707
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:01:05,781 epoch: 19, train loss: 12.831835746765137, val loss: 47.31

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:01:13,421 0 genes in training set are missing from prediction set
2020-11-12 14:01:13,893 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:01:13,942 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pbmc_train_snRNAseq_brca_val/pollock_module
2020-11-12 14:01:13,943 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Megakaryocyte' 'Monocyte'
 'NK']
2020-11-12 14:01:14,004 0 genes in training set are missing from prediction set
2020-11-12 14:01:36,125 (9490, 7)
2020-11-12 14:01:36,126 {'Monocyte', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'B cell', 'NK'}


snRNAseq scRNAseq_pbmc_train_snRNAseq_ccrcc_val (940, 32738) (8605, 33538)


2020-11-12 14:01:46,115 input dataset shape: (9545, 20453)
2020-11-12 14:01:46,117 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:01:46,118 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:01:46,129 train shape: (940, 20453), val shape: (8605, 20453)
2020-11-12 14:01:46,131 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
2020-11-12 14:01:46,132 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:01:52,402 epoch: 1, train loss: 20.844329833984375, val loss: 55.95701599121094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:01:55,975 epoch: 2, train 

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:02:36,571 epoch: 13, train loss: 16.81146240234375, val loss: 58.52430725097656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:02:40,245 epoch: 14, train loss: 16.777074813842773, val loss: 58.33887481689453
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:02:43,952 epoch: 15, train loss: 16.6740779876709, val loss: 58.186012268066406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:02:47,668 epoch: 16, train loss: 16.6552734375, val loss: 58.174964904785156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:02:51,354 epoch: 17, train loss: 16.66354751586914, val loss: 58.20653533935547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:02:54,996 epoch: 18, train loss: 16.616615295410156, val loss: 58.14786529

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:03:06,634 0 genes in training set are missing from prediction set
2020-11-12 14:03:07,198 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:03:07,240 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pbmc_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 14:03:07,241 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Megakaryocyte' 'Monocyte'
 'NK']
2020-11-12 14:03:07,299 0 genes in training set are missing from prediction set
2020-11-12 14:03:27,041 (8605, 7)
2020-11-12 14:03:27,042 {'Monocyte', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'B cell', 'NK'}


snRNAseq scRNAseq_pbmc_train_snRNAseq_gbm_val (940, 32738) (6810, 29748)


2020-11-12 14:03:32,944 input dataset shape: (7750, 18949)
2020-11-12 14:03:32,946 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:03:32,947 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:03:32,966 train shape: (940, 18949), val shape: (6810, 18949)
2020-11-12 14:03:32,967 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
2020-11-12 14:03:32,968 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):




2020-11-12 14:03:37,256 5 out of the last 23 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
  if not is_categorical(df_full[k]):
2020-11-12 14:03:37,959 epoch: 1, train loss: 18.76492691040039, val loss: 42.80901336669922
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:03:41,032 epoch: 2, train loss: 17.395275115966797, val loss: 42.163177490234375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:03:44,129 epoch: 3, train loss: 15.876825332641602, val loss:

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:04:17,511 epoch: 14, train loss: 14.778563499450684, val loss: 43.63029861450195
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:04:20,490 epoch: 15, train loss: 14.65810489654541, val loss: 43.56562423706055
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:04:23,491 epoch: 16, train loss: 14.692280769348145, val loss: 43.58649826049805
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:04:26,563 epoch: 17, train loss: 14.705354690551758, val loss: 43.49148941040039
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:04:29,621 epoch: 18, train loss: 14.641139030456543, val loss: 43.518585205078125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:04:32,642 epoch: 19, train loss: 14.59222412109375, val loss: 43.50

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:04:39,183 0 genes in training set are missing from prediction set
2020-11-12 14:04:39,695 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:04:39,755 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pbmc_train_snRNAseq_gbm_val/pollock_module
2020-11-12 14:04:39,756 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Megakaryocyte' 'Monocyte'
 'NK']
2020-11-12 14:04:39,826 0 genes in training set are missing from prediction set
2020-11-12 14:04:54,883 (6810, 7)
2020-11-12 14:04:54,884 {'Monocyte', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'B cell', 'NK'}


snRNAseq scRNAseq_pdac_train_snRNAseq_brca_val (3296, 28756) (9490, 29175)


2020-11-12 14:05:07,804 input dataset shape: (12786, 26783)
2020-11-12 14:05:07,808 possible cell types: ['Acinar', 'Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 14:05:07,809 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:05:07,826 train shape: (3296, 26783), val shape: (9490, 26783)
2020-11-12 14:05:07,827 train labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 14:05:07,828 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:05:21,819 epoch: 1, tr

2020-11-12 14:07:22,481 epoch: 12, train loss: 27.795549392700195, val loss: 47.803375244140625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:07:33,462 epoch: 13, train loss: 27.65186309814453, val loss: 47.75993728637695
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:07:44,521 epoch: 14, train loss: 27.561946868896484, val loss: 47.746070861816406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:07:55,415 epoch: 15, train loss: 27.442930221557617, val loss: 47.81575012207031
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:08:06,540 epoch: 16, train loss: 27.35649871826172, val loss: 47.80686569213867
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:08:17,399 epoch: 17, train loss: 27.252225875854492, val loss: 47.87405776977539
  if not is_categorical(df_full[k]):
  if not is_categorical

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:08:58,880 0 genes in training set are missing from prediction set
2020-11-12 14:08:59,514 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:08:59,574 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pdac_train_snRNAseq_brca_val/pollock_module
2020-11-12 14:08:59,575 ['Acinar' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Epithelial' 'Erythrocyte' 'Fibroblast' 'Islet' 'Malignant' 'Mast'
 'Monocyte' 'NK' 'Plasma' 'Treg' 'Tuft']
2020-11-12 14:08:59,635 0 genes in training set are missing from prediction set
2020-11-12 14:09:24,938 (9490, 17)
2020-11-12 14:09:24,939 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Acinar', 'CD8 T cell', 'Malignant', 'Dendritic', 'Epithelial', 'Tuft', 'Erythrocyte', 'Fibroblast', 'B cell', 'Plasma', 'Islet', 'NK'}


snRNAseq scRNAseq_pdac_train_snRNAseq_ccrcc_val (3296, 28756) (8605, 33538)


2020-11-12 14:09:37,923 input dataset shape: (11901, 28756)
2020-11-12 14:09:37,926 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 14:09:37,927 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:09:37,942 train shape: (3296, 28756), val shape: (8605, 28756)
2020-11-12 14:09:37,944 train labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 14:09:37,945 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:09:51,642 epoch: 1, train loss: 33.89693832397461, v

2020-11-12 14:11:47,195 epoch: 12, train loss: 30.400495529174805, val loss: 57.954620361328125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:11:57,786 epoch: 13, train loss: 30.258216857910156, val loss: 58.063232421875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:12:08,336 epoch: 14, train loss: 30.136049270629883, val loss: 58.084476470947266
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:12:18,673 epoch: 15, train loss: 30.00821876525879, val loss: 58.19329071044922
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:12:29,075 epoch: 16, train loss: 29.88555145263672, val loss: 58.265403747558594
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:12:39,336 epoch: 17, train loss: 29.796194076538086, val loss: 58.40263366699219
  if not is_categorical(df_full[k]):
  if not is_categorical(

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:13:19,200 0 genes in training set are missing from prediction set
2020-11-12 14:13:19,903 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:13:19,999 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pdac_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 14:13:20,002 ['Acinar' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Epithelial' 'Erythrocyte' 'Fibroblast' 'Islet' 'Malignant' 'Mast'
 'Monocyte' 'NK' 'Plasma' 'Treg' 'Tuft']
2020-11-12 14:13:20,095 0 genes in training set are missing from prediction set
2020-11-12 14:13:44,316 (8605, 17)
2020-11-12 14:13:44,317 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Acinar', 'CD8 T cell', 'Malignant', 'Dendritic', 'Erythrocyte', 'Fibroblast', 'Mast', 'Plasma', 'Epithelial', 'NK'}


snRNAseq scRNAseq_pdac_train_snRNAseq_gbm_val (3296, 28756) (6810, 29748)


2020-11-12 14:13:54,776 input dataset shape: (10106, 27015)
2020-11-12 14:13:54,779 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-12 14:13:54,780 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:13:54,802 train shape: (3296, 27015), val shape: (6810, 27015)
2020-11-12 14:13:54,804 train labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 14:13:54,805 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:14:07,645 epo

2020-11-12 14:15:59,513 epoch: 12, train loss: 29.980438232421875, val loss: 43.83639144897461
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:16:09,874 epoch: 13, train loss: 29.846914291381836, val loss: 43.84630584716797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:16:20,352 epoch: 14, train loss: 29.801502227783203, val loss: 43.82703399658203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:16:30,791 epoch: 15, train loss: 29.646291732788086, val loss: 43.83257293701172
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:16:41,174 epoch: 16, train loss: 29.541278839111328, val loss: 43.83428192138672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:16:51,349 epoch: 17, train loss: 29.435577392578125, val loss: 43.90430450439453
  if not is_categorical(df_full[k]):
  if not is_categorical

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:17:30,064 0 genes in training set are missing from prediction set
2020-11-12 14:17:30,715 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:17:30,810 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/scRNAseq_pdac_train_snRNAseq_gbm_val/pollock_module
2020-11-12 14:17:30,811 ['Acinar' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Epithelial' 'Erythrocyte' 'Fibroblast' 'Islet' 'Malignant' 'Mast'
 'Monocyte' 'NK' 'Plasma' 'Treg' 'Tuft']
2020-11-12 14:17:30,883 0 genes in training set are missing from prediction set
2020-11-12 14:17:49,740 (6810, 17)
2020-11-12 14:17:49,741 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Acinar', 'CD8 T cell', 'Dendritic', 'Malignant', 'Tuft', 'Erythrocyte', 'Fibroblast', 'Plasma', 'Islet', 'NK'}


snRNAseq snATACseq_brca_train_scRNAseq_brca_val (2064, 19891) (11253, 27131)


2020-11-12 14:17:59,115 input dataset shape: (13317, 17565)
2020-11-12 14:17:59,117 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:17:59,118 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:17:59,138 train shape: (2064, 17565), val shape: (11253, 17565)
2020-11-12 14:17:59,139 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:17:59,140 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:18:07,368 epoch: 1, train loss: 59.394309997558594, val loss: 23.433025360107422
  if not is_categorical(df_full[k]):
  if not is_categoric

2020-11-12 14:19:12,619 epoch: 12, train loss: 56.11210250854492, val loss: 22.113319396972656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:19:18,500 epoch: 13, train loss: 56.506065368652344, val loss: 22.093013763427734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:19:24,388 epoch: 14, train loss: 55.70210266113281, val loss: 22.08348274230957
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:19:30,372 epoch: 15, train loss: 55.918277740478516, val loss: 22.040363311767578
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:19:36,237 epoch: 16, train loss: 55.19590759277344, val loss: 22.06815528869629
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:19:42,232 epoch: 17, train loss: 54.948326110839844, val loss: 22.04745101928711
  if not is_categorical(df_full[k]):
  if not is_categorical

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:20:05,493 0 genes in training set are missing from prediction set
2020-11-12 14:20:05,975 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:20:06,051 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_scRNAseq_brca_val/pollock_module
2020-11-12 14:20:06,052 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:20:06,214 0 genes in training set are missing from prediction set
2020-11-12 14:20:25,117 (10000, 11)
2020-11-12 14:20:25,118 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Mast', 'Fibroblast', 'NK'}
2020-11-12 14:20:25,124 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:20:25,194 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAs

snRNAseq snATACseq_brca_train_scRNAseq_cesc_val (2064, 19891) (8449, 22928)


2020-11-12 14:20:34,874 input dataset shape: (10513, 16509)
2020-11-12 14:20:34,876 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:20:34,877 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:20:34,898 train shape: (2064, 16509), val shape: (8449, 16509)
2020-11-12 14:20:34,900 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:20:34,901 val labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:20:42,213 epoch: 1, train loss: 51.8328971862793, val loss: 28.30971908569336
  if not is_categorical(df_full[k]):
  if not is_categorical(df_f

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:21:45,897 epoch: 13, train loss: 48.360496520996094, val loss: 27.026248931884766
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:21:51,179 epoch: 14, train loss: 48.454708099365234, val loss: 26.960580825805664
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:21:56,510 epoch: 15, train loss: 48.08161163330078, val loss: 26.95620346069336
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:22:01,789 epoch: 16, train loss: 47.9471321105957, val loss: 26.923084259033203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:22:07,135 epoch: 17, train loss: 47.71824645996094, val loss: 26.937088012695312
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:22:12,432 epoch: 18, train loss: 47.54096603393555, val loss: 26.86

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:22:28,048 0 genes in training set are missing from prediction set
2020-11-12 14:22:28,504 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:22:29,036 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_scRNAseq_cesc_val/pollock_module
2020-11-12 14:22:29,037 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:22:29,075 0 genes in training set are missing from prediction set
2020-11-12 14:22:48,186 (8449, 11)
2020-11-12 14:22:48,187 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Mast', 'Fibroblast', 'NK'}


snRNAseq snATACseq_brca_train_scRNAseq_hnscc_val (2064, 19891) (10288, 26929)


2020-11-12 14:22:56,883 input dataset shape: (12352, 17615)
2020-11-12 14:22:56,885 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:22:56,886 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:22:56,907 train shape: (2064, 17615), val shape: (10288, 17615)
2020-11-12 14:22:56,909 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:22:56,910 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:23:04,860 epoch: 1, train loss: 54.67675018310547, val loss: 11.50849723815918
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:24:15,316 epoch: 13, train loss: 51.76173782348633, val loss: 9.640167236328125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:24:21,174 epoch: 14, train loss: 51.563350677490234, val loss: 9.606536865234375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:24:27,018 epoch: 15, train loss: 51.20090866088867, val loss: 9.593950271606445
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:24:32,815 epoch: 16, train loss: 51.51961898803711, val loss: 9.624401092529297
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:24:38,650 epoch: 17, train loss: 51.10722732543945, val loss: 9.606217384338379
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:24:44,361 epoch: 18, train loss: 50.464012145996094, val loss: 9.597812

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:25:01,695 0 genes in training set are missing from prediction set
2020-11-12 14:25:02,175 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:25:02,245 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_scRNAseq_hnscc_val/pollock_module
2020-11-12 14:25:02,247 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:25:02,428 0 genes in training set are missing from prediction set
2020-11-12 14:25:22,970 (10000, 11)
2020-11-12 14:25:22,971 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Mast', 'Fibroblast', 'NK'}
2020-11-12 14:25:22,978 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:25:23,044 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNA

snRNAseq snATACseq_brca_train_scRNAseq_melanoma_val (2064, 19891) (6735, 23452)


2020-11-12 14:25:29,653 input dataset shape: (8799, 16275)
2020-11-12 14:25:29,656 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:25:29,656 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:25:29,673 train shape: (2064, 16275), val shape: (6735, 16275)
2020-11-12 14:25:29,675 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:25:29,676 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:25:36,574 epoch: 1, train loss: 59.425506591796875, val loss: 22.0986385345459
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:25:41,722 epoch: 2, train l

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:26:38,293 epoch: 13, train loss: 55.58600616455078, val loss: 20.40456771850586
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:26:43,480 epoch: 14, train loss: 55.87299346923828, val loss: 20.412353515625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:26:48,665 epoch: 15, train loss: 55.03267288208008, val loss: 20.395832061767578
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:26:53,856 epoch: 16, train loss: 54.60581970214844, val loss: 20.400062561035156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:26:58,992 epoch: 17, train loss: 54.61094665527344, val loss: 20.398136138916016
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:27:04,121 epoch: 18, train loss: 54.29209518432617, val loss: 20.424329

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:27:18,808 0 genes in training set are missing from prediction set
2020-11-12 14:27:19,270 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:27:19,345 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_scRNAseq_melanoma_val/pollock_module
2020-11-12 14:27:19,347 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:27:19,379 0 genes in training set are missing from prediction set
2020-11-12 14:27:29,809 (6735, 11)
2020-11-12 14:27:29,810 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Fibroblast', 'Mast', 'NK'}


snRNAseq snATACseq_brca_train_scRNAseq_pbmc_val (2064, 19891) (1698, 32738)


2020-11-12 14:27:32,970 input dataset shape: (3762, 18919)
2020-11-12 14:27:32,971 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:27:32,972 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:27:32,985 train shape: (2064, 18919), val shape: (1698, 18919)
2020-11-12 14:27:32,987 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:27:32,987 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:27:39,286 epoch: 1, train loss: 63.731571197509766, val loss: 13.371467590332031
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:27:44,167 epoch: 2, train loss: 63.06821823120

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:28:37,494 epoch: 13, train loss: 59.95395278930664, val loss: 11.138011932373047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:28:42,386 epoch: 14, train loss: 59.38349533081055, val loss: 11.047391891479492
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:28:47,243 epoch: 15, train loss: 59.248043060302734, val loss: 10.97459602355957
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:28:52,063 epoch: 16, train loss: 58.916419982910156, val loss: 11.036199569702148
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:28:56,918 epoch: 17, train loss: 58.585391998291016, val loss: 10.948400497436523
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:29:01,757 epoch: 18, train loss: 58.4095573425293, val loss: 11.0

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:29:14,959 0 genes in training set are missing from prediction set
2020-11-12 14:29:15,469 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:29:15,534 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_scRNAseq_pbmc_val/pollock_module
2020-11-12 14:29:15,536 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:29:15,549 0 genes in training set are missing from prediction set
2020-11-12 14:29:18,211 (1698, 11)
2020-11-12 14:29:18,212 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Mast', 'Fibroblast', 'NK'}


snRNAseq snATACseq_brca_train_scRNAseq_pdac_val (2064, 19891) (15435, 28756)


2020-11-12 14:29:30,205 input dataset shape: (17499, 17904)
2020-11-12 14:29:30,209 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 14:29:30,209 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:29:30,234 train shape: (2064, 17904), val shape: (15435, 17904)
2020-11-12 14:29:30,236 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:29:30,237 val labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:29:38,866 epoch: 1, train loss: 56.07087707519531, val lo

2020-11-12 14:30:44,676 epoch: 12, train loss: 53.1986198425293, val loss: 18.577388763427734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:30:50,623 epoch: 13, train loss: 52.93228530883789, val loss: 18.558271408081055
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:30:56,667 epoch: 14, train loss: 52.7100830078125, val loss: 18.396873474121094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:31:02,602 epoch: 15, train loss: 52.5797233581543, val loss: 18.518508911132812
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:31:08,530 epoch: 16, train loss: 51.95805358886719, val loss: 18.45919418334961
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:31:14,502 epoch: 17, train loss: 51.62030029296875, val loss: 18.40766143798828
  if not is_categorical(df_full[k]):
  if not is_categorical(df_f

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:31:40,069 0 genes in training set are missing from prediction set
2020-11-12 14:31:40,350 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:31:40,400 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_scRNAseq_pdac_val/pollock_module
2020-11-12 14:31:40,401 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:31:40,495 0 genes in training set are missing from prediction set
2020-11-12 14:32:00,699 (10000, 11)
2020-11-12 14:32:00,701 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Fibroblast', 'Mast', 'NK'}
2020-11-12 14:32:00,708 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:32:00,777 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAs

snRNAseq snATACseq_gbm_train_scRNAseq_brca_val (1316, 19891) (11253, 27131)


2020-11-12 14:32:19,669 input dataset shape: (12569, 17565)
2020-11-12 14:32:19,671 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 14:32:19,672 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:32:19,688 train shape: (1316, 17565), val shape: (11253, 17565)
2020-11-12 14:32:19,690 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:32:19,691 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:32:26,068 epoch: 1, train loss: 71.66114807128906, val loss: 28.782745361328125
  if not is_categorical(df

2020-11-12 14:33:11,861 epoch: 12, train loss: 66.58680725097656, val loss: 24.402877807617188
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:33:16,006 epoch: 13, train loss: 66.07767486572266, val loss: 24.35727310180664
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:33:20,116 epoch: 14, train loss: 65.4087142944336, val loss: 24.352602005004883
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:33:24,266 epoch: 15, train loss: 65.06594848632812, val loss: 24.323524475097656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:33:28,423 epoch: 16, train loss: 65.00625610351562, val loss: 24.362285614013672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:33:32,534 epoch: 17, train loss: 64.64754486083984, val loss: 24.306737899780273
  if not is_categorical(df_full[k]):
  if not is_categorical(d

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:33:49,815 0 genes in training set are missing from prediction set
2020-11-12 14:33:50,292 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:33:50,349 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_brca_val/pollock_module
2020-11-12 14:33:50,350 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:33:50,455 0 genes in training set are missing from prediction set
2020-11-12 14:34:08,971 (10000, 8)
2020-11-12 14:34:08,972 {'Endothelial', 'Malignant', 'T cells', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}
2020-11-12 14:34:08,978 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:34:09,030 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_brca_val/

snRNAseq snATACseq_gbm_train_scRNAseq_cesc_val (1316, 19891) (8449, 22928)


2020-11-12 14:34:18,174 input dataset shape: (9765, 16509)
2020-11-12 14:34:18,177 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-12 14:34:18,177 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:34:18,192 train shape: (1316, 16509), val shape: (8449, 16509)
2020-11-12 14:34:18,194 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:34:18,195 val labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:34:23,839 epoch: 1, train loss: 61.043853759765625, val loss: 32.806243896484375
  if not is_categorical(df_full[k]):
  if not is_ca

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:35:08,064 epoch: 13, train loss: 56.50913619995117, val loss: 29.14035415649414
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:35:11,723 epoch: 14, train loss: 56.00383758544922, val loss: 29.16150665283203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:35:15,387 epoch: 15, train loss: 55.66704559326172, val loss: 29.199703216552734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:35:19,038 epoch: 16, train loss: 55.4564323425293, val loss: 29.104389190673828
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:35:22,669 epoch: 17, train loss: 55.32448196411133, val loss: 29.206012725830078
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:35:26,323 epoch: 18, train loss: 54.867088317871094, val loss: 29.1075

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:35:37,556 0 genes in training set are missing from prediction set
2020-11-12 14:35:38,000 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:35:38,058 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_cesc_val/pollock_module
2020-11-12 14:35:38,059 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:35:38,123 0 genes in training set are missing from prediction set
2020-11-12 14:35:57,521 (8449, 8)
2020-11-12 14:35:57,522 {'Endothelial', 'Malignant', 'T cells', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}


snRNAseq snATACseq_gbm_train_scRNAseq_hnscc_val (1316, 19891) (10288, 26929)


2020-11-12 14:36:05,604 input dataset shape: (11604, 17615)
2020-11-12 14:36:05,608 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 14:36:05,609 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:36:05,627 train shape: (1316, 17615), val shape: (10288, 17615)
2020-11-12 14:36:05,628 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:36:05,629 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:36:11,622 epoch: 1, train loss: 65.45384216308594, val loss: 15.696086883544922
  if not is_categorical(df_full[k]):
  if not is_categorical(df_fu

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:37:00,401 epoch: 13, train loss: 60.053653717041016, val loss: 10.402352333068848
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:37:04,463 epoch: 14, train loss: 59.86148452758789, val loss: 10.4434232711792
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:37:08,466 epoch: 15, train loss: 59.471343994140625, val loss: 10.445959091186523
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:37:12,543 epoch: 16, train loss: 59.097320556640625, val loss: 10.478105545043945
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:37:16,613 epoch: 17, train loss: 58.68993377685547, val loss: 10.44328498840332
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:37:20,565 epoch: 18, train loss: 58.45751953125, val loss: 10.38866

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:37:33,244 0 genes in training set are missing from prediction set
2020-11-12 14:37:33,729 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:37:33,785 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_hnscc_val/pollock_module
2020-11-12 14:37:33,787 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:37:33,965 0 genes in training set are missing from prediction set
2020-11-12 14:37:54,441 (10000, 8)
2020-11-12 14:37:54,442 {'Endothelial', 'Malignant', 'T cells', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}
2020-11-12 14:37:54,449 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:37:54,498 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_hnscc_va

snRNAseq snATACseq_gbm_train_scRNAseq_melanoma_val (1316, 19891) (6735, 23452)


2020-11-12 14:38:00,514 input dataset shape: (8051, 16275)
2020-11-12 14:38:00,516 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 14:38:00,517 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:38:00,535 train shape: (1316, 16275), val shape: (6735, 16275)
2020-11-12 14:38:00,536 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:38:00,537 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:38:05,814 epoch: 1, train loss: 71.6219253540039, val loss: 27.02459716796875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:38:

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:38:46,812 epoch: 13, train loss: 66.39887237548828, val loss: 22.423128128051758
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:38:50,184 epoch: 14, train loss: 66.10533142089844, val loss: 22.433807373046875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:38:53,618 epoch: 15, train loss: 65.87523651123047, val loss: 22.46096420288086
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:38:57,037 epoch: 16, train loss: 65.41240692138672, val loss: 22.47298240661621
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:39:00,444 epoch: 17, train loss: 64.89769744873047, val loss: 22.42923355102539
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:39:03,896 epoch: 18, train loss: 65.08782958984375, val loss: 22.50491

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:39:14,150 0 genes in training set are missing from prediction set
2020-11-12 14:39:14,989 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:39:15,028 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_melanoma_val/pollock_module
2020-11-12 14:39:15,028 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:39:15,053 0 genes in training set are missing from prediction set
2020-11-12 14:39:25,093 (6735, 8)
2020-11-12 14:39:25,094 {'Endothelial', 'Malignant', 'T cells', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}


snRNAseq snATACseq_gbm_train_scRNAseq_pbmc_val (1316, 19891) (1698, 32738)


2020-11-12 14:39:27,663 input dataset shape: (3014, 18919)
2020-11-12 14:39:27,664 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:39:27,665 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:39:27,676 train shape: (1316, 18919), val shape: (1698, 18919)
2020-11-12 14:39:27,677 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:39:27,678 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:39:32,034 epoch: 1, train loss: 78.02565002441406, val loss: 18.09144401550293
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:39:35,274 epoch: 2, train loss:

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:10,618 epoch: 13, train loss: 71.89328002929688, val loss: 12.134435653686523
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:13,765 epoch: 14, train loss: 71.89604187011719, val loss: 12.132942199707031
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:16,957 epoch: 15, train loss: 71.16285705566406, val loss: 12.145111083984375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:20,123 epoch: 16, train loss: 71.45111083984375, val loss: 12.213960647583008
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:23,280 epoch: 17, train loss: 70.42045593261719, val loss: 12.104808807373047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:26,486 epoch: 18, train loss: 70.0088882446289, val loss: 12.132

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:40:35,489 0 genes in training set are missing from prediction set
2020-11-12 14:40:35,996 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:40:36,048 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_pbmc_val/pollock_module
2020-11-12 14:40:36,049 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:40:36,065 0 genes in training set are missing from prediction set
2020-11-12 14:40:38,727 (1698, 8)
2020-11-12 14:40:38,728 {'Malignant', 'T cells', 'Microglia', 'B cell', 'Oligodendrocytes', 'Fibroblast', 'Neuron'}


snRNAseq snATACseq_gbm_train_scRNAseq_pdac_val (1316, 19891) (15435, 28756)


2020-11-12 14:40:50,364 input dataset shape: (16751, 17904)
2020-11-12 14:40:50,366 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-12 14:40:50,367 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:40:50,386 train shape: (1316, 17904), val shape: (15435, 17904)
2020-11-12 14:40:50,388 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:40:50,389 val labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:40:57,432 epoch: 1, trai

2020-11-12 14:41:45,964 epoch: 12, train loss: 62.12629699707031, val loss: 19.943452835083008
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:41:50,471 epoch: 13, train loss: 61.912010192871094, val loss: 19.93376922607422
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:41:54,772 epoch: 14, train loss: 61.64417266845703, val loss: 19.827178955078125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:41:59,075 epoch: 15, train loss: 61.1320915222168, val loss: 19.879520416259766
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:42:03,477 epoch: 16, train loss: 60.55359649658203, val loss: 19.814117431640625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:42:07,785 epoch: 17, train loss: 60.3519401550293, val loss: 19.93807601928711
  if not is_categorical(df_full[k]):
  if not is_categorical(df

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:42:26,995 0 genes in training set are missing from prediction set
2020-11-12 14:42:27,482 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:42:27,537 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_pdac_val/pollock_module
2020-11-12 14:42:27,538 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:42:27,684 0 genes in training set are missing from prediction set
2020-11-12 14:42:47,570 (10000, 8)
2020-11-12 14:42:47,571 {'Malignant', 'T cells', 'Microglia', 'B cell', 'Oligodendrocytes', 'Fibroblast', 'Neuron'}
2020-11-12 14:42:47,579 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:42:47,636 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_scRNAseq_pdac_val/pollock_module


snRNAseq snATACseq_brca_train_snRNAseq_brca_val (2064, 19891) (9490, 29175)


2020-11-12 14:43:06,582 input dataset shape: (11554, 17494)
2020-11-12 14:43:06,584 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:43:06,585 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:43:06,605 train shape: (2064, 17494), val shape: (9490, 17494)
2020-11-12 14:43:06,606 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:43:06,607 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:43:14,320 epoch: 1, train loss: 59.13610076904297, val loss: 25.17626190185547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_f

2020-11-12 14:44:15,795 epoch: 12, train loss: 54.88400650024414, val loss: 24.127269744873047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:44:21,382 epoch: 13, train loss: 54.73920440673828, val loss: 24.119380950927734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:44:26,899 epoch: 14, train loss: 54.43754959106445, val loss: 24.117128372192383
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:44:32,504 epoch: 15, train loss: 54.28376007080078, val loss: 24.120342254638672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:44:38,140 epoch: 16, train loss: 54.15849304199219, val loss: 24.13874053955078
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:44:43,759 epoch: 17, train loss: 54.3734016418457, val loss: 24.123428344726562
  if not is_categorical(df_full[k]):
  if not is_categorical(d

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:45:05,934 0 genes in training set are missing from prediction set
2020-11-12 14:45:06,416 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:45:06,486 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_snRNAseq_brca_val/pollock_module
2020-11-12 14:45:06,488 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:45:06,573 0 genes in training set are missing from prediction set
2020-11-12 14:45:26,414 (9490, 11)
2020-11-12 14:45:26,416 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Mast', 'NK'}


snRNAseq snATACseq_brca_train_snRNAseq_ccrcc_val (2064, 19891) (8605, 33538)


2020-11-12 14:45:34,708 input dataset shape: (10669, 18895)
2020-11-12 14:45:34,710 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:45:34,711 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:45:34,735 train shape: (2064, 18895), val shape: (8605, 18895)
2020-11-12 14:45:34,737 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:45:34,737 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:45:42,663 epoch: 1, train loss: 61.66411209106445, val loss: 31.34377670288086
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:46:52,650 epoch: 13, train loss: 58.20182418823242, val loss: 30.168880462646484
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:46:58,461 epoch: 14, train loss: 58.120426177978516, val loss: 30.159414291381836
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:47:04,281 epoch: 15, train loss: 57.42182922363281, val loss: 30.121057510375977
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:47:10,132 epoch: 16, train loss: 57.07340621948242, val loss: 30.139448165893555
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:47:15,955 epoch: 17, train loss: 56.87788772583008, val loss: 30.101099014282227
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:47:21,842 epoch: 18, train loss: 57.157711029052734, val loss: 30.

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:47:39,002 0 genes in training set are missing from prediction set
2020-11-12 14:47:39,517 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:47:39,592 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 14:47:39,594 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:47:39,676 0 genes in training set are missing from prediction set
2020-11-12 14:47:59,543 (8605, 11)
2020-11-12 14:47:59,544 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Mast', 'Fibroblast', 'NK'}


snRNAseq snATACseq_brca_train_snRNAseq_gbm_val (2064, 19891) (6810, 29748)


2020-11-12 14:48:06,031 input dataset shape: (8874, 17613)
2020-11-12 14:48:06,034 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells', 'Treg']
2020-11-12 14:48:06,034 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:48:06,060 train shape: (2064, 17613), val shape: (6810, 17613)
2020-11-12 14:48:06,061 train labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
2020-11-12 14:48:06,062 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:48:13,212 epoch: 1, train loss: 60.01771545410156, val loss: 21.783950805664062
  if not is_categorical(df_full[k]):
  if not is_categorical(df_fu

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:49:16,073 epoch: 13, train loss: 56.9305305480957, val loss: 20.71633529663086
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:49:21,298 epoch: 14, train loss: 56.7541618347168, val loss: 20.71527099609375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:49:26,538 epoch: 15, train loss: 55.90329360961914, val loss: 20.709728240966797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:49:31,803 epoch: 16, train loss: 55.878501892089844, val loss: 20.705970764160156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:49:37,013 epoch: 17, train loss: 55.46759796142578, val loss: 20.680482864379883
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:49:42,254 epoch: 18, train loss: 55.467533111572266, val loss: 20.6983

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:49:57,761 0 genes in training set are missing from prediction set
2020-11-12 14:49:58,253 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:49:58,336 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_brca_train_snRNAseq_gbm_val/pollock_module
2020-11-12 14:49:58,338 ['B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Fibroblast'
 'Malignant' 'Mast' 'Monocyte' 'NK' 'Treg']
2020-11-12 14:49:58,409 0 genes in training set are missing from prediction set
2020-11-12 14:50:12,981 (6810, 11)
2020-11-12 14:50:12,982 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Mast', 'NK'}


snRNAseq snATACseq_gbm_train_snRNAseq_brca_val (1316, 19891) (9490, 29175)


2020-11-12 14:50:20,619 input dataset shape: (10806, 17494)
2020-11-12 14:50:20,622 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 14:50:20,623 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:50:20,642 train shape: (1316, 17494), val shape: (9490, 17494)
2020-11-12 14:50:20,643 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:50:20,644 val labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:50:26,639 epoch: 1, train loss: 68.5948257446289, val loss: 30.475549697875977
  if not is_categorical(df_full[

2020-11-12 14:51:10,302 epoch: 12, train loss: 63.65363311767578, val loss: 27.262895584106445
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:51:14,244 epoch: 13, train loss: 63.86856460571289, val loss: 27.24945640563965
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:51:18,234 epoch: 14, train loss: 63.04463195800781, val loss: 27.226762771606445
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:51:22,295 epoch: 15, train loss: 63.060115814208984, val loss: 27.254974365234375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:51:26,258 epoch: 16, train loss: 62.62888717651367, val loss: 27.211715698242188
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:51:30,206 epoch: 17, train loss: 62.14778518676758, val loss: 27.187604904174805
  if not is_categorical(df_full[k]):
  if not is_categorical

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:51:46,539 0 genes in training set are missing from prediction set
2020-11-12 14:51:47,019 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:51:47,077 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_snRNAseq_brca_val/pollock_module
2020-11-12 14:51:47,078 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:51:47,163 0 genes in training set are missing from prediction set
2020-11-12 14:52:06,998 (9490, 8)
2020-11-12 14:52:07,000 {'Endothelial', 'Malignant', 'T cells', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}


snRNAseq snATACseq_gbm_train_snRNAseq_ccrcc_val (1316, 19891) (8605, 33538)


2020-11-12 14:52:14,629 input dataset shape: (9921, 18895)
2020-11-12 14:52:14,631 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 14:52:14,632 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:52:14,648 train shape: (1316, 18895), val shape: (8605, 18895)
2020-11-12 14:52:14,650 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:52:14,651 val labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:52:20,806 epoch: 1, train loss: 72.8953857421875, val loss: 36.95677185058594
  if not is_categorical(df_full[k]):
  if not is_categoric

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:53:10,359 epoch: 13, train loss: 68.02352142333984, val loss: 34.03582763671875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:53:14,529 epoch: 14, train loss: 67.27422332763672, val loss: 34.03211212158203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:53:18,683 epoch: 15, train loss: 67.29920959472656, val loss: 34.0352783203125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:53:22,833 epoch: 16, train loss: 66.48639678955078, val loss: 33.952064514160156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:53:27,028 epoch: 17, train loss: 66.28021240234375, val loss: 33.94629669189453
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:53:31,152 epoch: 18, train loss: 65.89237976074219, val loss: 33.9243240

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:53:43,905 0 genes in training set are missing from prediction set
2020-11-12 14:53:44,414 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:53:44,477 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_snRNAseq_ccrcc_val/pollock_module
2020-11-12 14:53:44,478 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:53:44,561 0 genes in training set are missing from prediction set
2020-11-12 14:54:03,777 (8605, 8)
2020-11-12 14:54:03,778 {'Endothelial', 'T cells', 'Malignant', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}


snRNAseq snATACseq_gbm_train_snRNAseq_gbm_val (1316, 19891) (6810, 29748)


2020-11-12 14:54:09,940 input dataset shape: (8126, 17613)
2020-11-12 14:54:09,943 possible cell types: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:54:09,944 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:54:09,962 train shape: (1316, 17613), val shape: (6810, 17613)
2020-11-12 14:54:09,964 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 14:54:09,965 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:54:15,705 epoch: 1, train loss: 71.927490234375, val loss: 27.228487014770508
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:54:19,558 epoch: 2, train loss: 69.41294860839844, val loss: 24.

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:55:01,475 epoch: 13, train loss: 65.81718444824219, val loss: 23.55942153930664
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:55:05,284 epoch: 14, train loss: 66.19630432128906, val loss: 23.535751342773438
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:55:09,092 epoch: 15, train loss: 65.50821685791016, val loss: 23.5170955657959
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:55:12,875 epoch: 16, train loss: 64.91056823730469, val loss: 23.494922637939453
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:55:16,691 epoch: 17, train loss: 65.05804443359375, val loss: 23.53298568725586
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:55:20,496 epoch: 18, train loss: 64.88488006591797, val loss: 23.495212

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:55:32,097 0 genes in training set are missing from prediction set
2020-11-12 14:55:32,594 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 14:55:32,662 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snATACseq_gbm_train_snRNAseq_gbm_val/pollock_module
2020-11-12 14:55:32,663 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Neuron'
 'Oligodendrocytes' 'T cells']
2020-11-12 14:55:32,729 0 genes in training set are missing from prediction set
2020-11-12 14:55:47,360 (6810, 8)
2020-11-12 14:55:47,361 {'Endothelial', 'Malignant', 'T cells', 'Microglia', 'Fibroblast', 'Oligodendrocytes', 'B cell', 'Neuron'}


snRNAseq snRNAseq_brca_train_scRNAseq_brca_val (2455, 29175) (11253, 27131)


2020-11-12 14:56:00,398 input dataset shape: (13708, 25674)
2020-11-12 14:56:00,400 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:56:00,401 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:56:00,412 train shape: (2455, 25674), val shape: (11253, 25674)
2020-11-12 14:56:00,414 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:56:00,414 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:56:11,747 epoch: 1, train loss: 50.44792175292969, val loss: 37.99460220336914
  if not is_categorical(

2020-11-12 14:57:44,050 epoch: 12, train loss: 46.670650482177734, val loss: 35.15503692626953
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:57:52,524 epoch: 13, train loss: 46.41646957397461, val loss: 35.10862731933594
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:58:00,709 epoch: 14, train loss: 46.33790588378906, val loss: 34.99468994140625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:58:09,029 epoch: 15, train loss: 46.135101318359375, val loss: 35.111610412597656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:58:17,381 epoch: 16, train loss: 45.93547439575195, val loss: 35.119972229003906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:58:25,548 epoch: 17, train loss: 45.76445770263672, val loss: 35.196163177490234
  if not is_categorical(df_full[k]):
  if not is_categorical(

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 14:58:57,895 0 genes in training set are missing from prediction set
2020-11-12 14:58:58,394 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:58:58,464 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_scRNAseq_brca_val/pollock_module
2020-11-12 14:58:58,465 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 14:58:58,564 0 genes in training set are missing from prediction set
2020-11-12 14:59:21,930 (10000, 13)
2020-11-12 14:59:21,931 {'Monocyte', 'Endothelial', 'Treg', 'CD8 T cell', 'Malignant', 'Dendritic', 'B cell', 'Fibroblast', 'Mast', 'Plasma', 'NK'}
2020-11-12 14:59:21,937 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 14:59:22,002 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross

snRNAseq snRNAseq_brca_train_scRNAseq_cesc_val (2455, 29175) (8449, 22928)


2020-11-12 14:59:34,747 input dataset shape: (10904, 22001)
2020-11-12 14:59:34,749 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:59:34,750 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 14:59:34,760 train shape: (2455, 22001), val shape: (8449, 22001)
2020-11-12 14:59:34,763 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 14:59:34,763 val labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 14:59:44,439 epoch: 1, train loss: 40.34833526611328, val loss: 41.94597625732422
  if not is_categorical(df_f

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:01:11,469 epoch: 13, train loss: 37.022064208984375, val loss: 39.450523376464844
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:01:18,823 epoch: 14, train loss: 36.92179489135742, val loss: 39.345916748046875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:01:26,058 epoch: 15, train loss: 36.77168655395508, val loss: 39.20825958251953
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:01:33,244 epoch: 16, train loss: 36.61147689819336, val loss: 39.18872833251953
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:01:40,384 epoch: 17, train loss: 36.49347686767578, val loss: 39.129634857177734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:01:47,714 epoch: 18, train loss: 36.4083251953125, val loss: 39.3130

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:02:08,389 0 genes in training set are missing from prediction set
2020-11-12 15:02:08,918 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:02:08,983 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_scRNAseq_cesc_val/pollock_module
2020-11-12 15:02:08,984 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:02:09,066 0 genes in training set are missing from prediction set
2020-11-12 15:02:32,322 (8449, 13)
2020-11-12 15:02:32,323 {'Monocyte', 'Endothelial', 'Treg', 'Adipocyte', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Fibroblast', 'Mast', 'Plasma', 'NK'}


snRNAseq snRNAseq_brca_train_scRNAseq_hnscc_val (2455, 29175) (10288, 26929)


2020-11-12 15:02:44,501 input dataset shape: (12743, 25299)
2020-11-12 15:02:44,504 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:02:44,504 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:02:44,528 train shape: (2455, 25299), val shape: (10288, 25299)
2020-11-12 15:02:44,530 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:02:44,531 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:02:55,701 epoch: 1, train loss: 43.82136917114258, val loss: 17.727659225463867
  if not is_categorical(df_full[k]):
  if not is_c

2020-11-12 15:04:27,783 epoch: 12, train loss: 40.3642578125, val loss: 14.81712532043457
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:04:36,278 epoch: 13, train loss: 40.168758392333984, val loss: 14.749866485595703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:04:44,658 epoch: 14, train loss: 40.06048583984375, val loss: 14.753308296203613
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:04:53,118 epoch: 15, train loss: 39.88812255859375, val loss: 14.686659812927246
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:05:01,504 epoch: 16, train loss: 39.769866943359375, val loss: 14.697813987731934
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:05:09,790 epoch: 17, train loss: 39.58357620239258, val loss: 14.675407409667969
  if not is_categorical(df_full[k]):
  if not is_categorical(df

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:05:42,119 0 genes in training set are missing from prediction set
2020-11-12 15:05:42,747 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:05:42,815 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_scRNAseq_hnscc_val/pollock_module
2020-11-12 15:05:42,816 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:05:42,997 0 genes in training set are missing from prediction set
2020-11-12 15:06:08,850 (10000, 13)
2020-11-12 15:06:08,851 {'Monocyte', 'Endothelial', 'Treg', 'Adipocyte', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Mast', 'Fibroblast', 'Plasma', 'NK'}
2020-11-12 15:06:08,857 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:06:08,921 /home/estorrs/pollock/benchmarking/results/102720

snRNAseq snRNAseq_brca_train_scRNAseq_melanoma_val (2455, 29175) (6735, 23452)


2020-11-12 15:06:17,070 input dataset shape: (9190, 21018)
2020-11-12 15:06:17,072 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:06:17,073 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:06:17,089 train shape: (2455, 21018), val shape: (6735, 21018)
2020-11-12 15:06:17,091 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:06:17,091 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:06:25,825 epoch: 1, train loss: 52.0567626953125, val loss: 35.44812774658203
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-1

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:07:45,357 epoch: 13, train loss: 48.03999328613281, val loss: 32.26381301879883
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:07:51,967 epoch: 14, train loss: 47.7831916809082, val loss: 32.33263397216797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:07:58,585 epoch: 15, train loss: 47.61672592163086, val loss: 32.42116165161133
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:08:05,240 epoch: 16, train loss: 47.47327423095703, val loss: 32.4876708984375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:08:11,851 epoch: 17, train loss: 47.27165603637695, val loss: 32.50497817993164
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:08:18,452 epoch: 18, train loss: 47.10639190673828, val loss: 32.837890625

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:08:37,520 0 genes in training set are missing from prediction set
2020-11-12 15:08:38,048 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:08:38,116 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_scRNAseq_melanoma_val/pollock_module
2020-11-12 15:08:38,117 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:08:38,152 0 genes in training set are missing from prediction set
2020-11-12 15:08:50,140 (6735, 13)
2020-11-12 15:08:50,141 {'Monocyte', 'B cell', 'Endothelial', 'Plasma'}


snRNAseq snRNAseq_brca_train_scRNAseq_pbmc_val (2455, 29175) (1698, 32738)


2020-11-12 15:08:53,387 input dataset shape: (4153, 18731)
2020-11-12 15:08:53,389 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:08:53,390 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:08:53,398 train shape: (2455, 18731), val shape: (1698, 18731)
2020-11-12 15:08:53,399 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:08:53,400 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:09:00,276 epoch: 1, train loss: 47.31116485595703, val loss: 21.601295471191406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:0

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:10:06,206 epoch: 13, train loss: 43.590728759765625, val loss: 18.097278594970703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:10:11,659 epoch: 14, train loss: 43.34333038330078, val loss: 18.0936336517334
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:10:17,117 epoch: 15, train loss: 43.20151138305664, val loss: 18.12579345703125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:10:22,531 epoch: 16, train loss: 43.09046173095703, val loss: 18.236942291259766
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:10:28,040 epoch: 17, train loss: 42.82646560668945, val loss: 18.146190643310547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:10:33,565 epoch: 18, train loss: 42.71232986450195, val loss: 18.2421

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:10:48,260 0 genes in training set are missing from prediction set
2020-11-12 15:10:48,771 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:10:48,834 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_scRNAseq_pbmc_val/pollock_module
2020-11-12 15:10:48,836 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:10:48,850 0 genes in training set are missing from prediction set
2020-11-12 15:10:51,474 (1698, 13)
2020-11-12 15:10:51,475 {'Monocyte', 'Endothelial', 'Plasma'}


snRNAseq snRNAseq_brca_train_scRNAseq_pdac_val (2455, 29175) (15435, 28756)


2020-11-12 15:11:09,168 input dataset shape: (17890, 26783)
2020-11-12 15:11:09,171 possible cell types: ['Acinar', 'Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 15:11:09,172 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:11:09,187 train shape: (2455, 26783), val shape: (15435, 26783)
2020-11-12 15:11:09,190 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:11:09,191 val labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:11:23,276 epoch: 1, t

2020-11-12 15:13:10,420 epoch: 12, train loss: 41.35862350463867, val loss: 26.038307189941406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:13:19,905 epoch: 13, train loss: 41.11583709716797, val loss: 25.933931350708008
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:13:29,674 epoch: 14, train loss: 40.99240493774414, val loss: 25.775583267211914
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:13:39,424 epoch: 15, train loss: 40.86333465576172, val loss: 25.809925079345703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:13:49,225 epoch: 16, train loss: 40.6617431640625, val loss: 25.865074157714844
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:13:58,988 epoch: 17, train loss: 40.5009651184082, val loss: 25.877796173095703
  if not is_categorical(df_full[k]):
  if not is_categorical(d

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:14:37,137 0 genes in training set are missing from prediction set
2020-11-12 15:14:37,786 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:14:37,837 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_scRNAseq_pdac_val/pollock_module
2020-11-12 15:14:37,839 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:14:37,928 0 genes in training set are missing from prediction set
2020-11-12 15:15:02,870 (10000, 13)
2020-11-12 15:15:02,871 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'B cell', 'Mast', 'Fibroblast', 'Plasma', 'NK'}
2020-11-12 15:15:02,877 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:15:02,944 /home/estorrs/pollock/benchmarking/results/102720

snRNAseq snRNAseq_ccrcc_train_scRNAseq_brca_val (2113, 33538) (11253, 27131)


2020-11-12 15:15:29,260 input dataset shape: (13366, 27131)
2020-11-12 15:15:29,262 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:15:29,263 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:15:29,287 train shape: (2113, 27131), val shape: (11253, 27131)
2020-11-12 15:15:29,289 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:15:29,290 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:15:40,436 epoch: 1, train loss: 47.18162536621094, val loss: 39.08136749267578
  if not is_categorical(df_full[k]):
  i

2020-11-12 15:17:09,301 epoch: 12, train loss: 42.10171890258789, val loss: 37.939056396484375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:17:17,312 epoch: 13, train loss: 42.91823959350586, val loss: 37.88962936401367
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:17:25,360 epoch: 14, train loss: 42.63137435913086, val loss: 37.814918518066406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:17:33,422 epoch: 15, train loss: 41.9699821472168, val loss: 37.823646545410156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:17:41,488 epoch: 16, train loss: 42.87195587158203, val loss: 37.845733642578125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:17:49,345 epoch: 17, train loss: 41.381282806396484, val loss: 37.8559455871582
  if not is_categorical(df_full[k]):
  if not is_categorical(df

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:18:20,785 0 genes in training set are missing from prediction set
2020-11-12 15:18:21,427 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:18:21,490 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_scRNAseq_brca_val/pollock_module
2020-11-12 15:18:21,491 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:18:21,661 0 genes in training set are missing from prediction set
2020-11-12 15:18:45,796 (10000, 11)
2020-11-12 15:18:45,797 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}
2020-11-12 15:18:45,805 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:18:45,864 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_dat

snRNAseq snRNAseq_ccrcc_train_scRNAseq_cesc_val (2113, 33538) (8449, 22928)


2020-11-12 15:18:58,616 input dataset shape: (10562, 22919)
2020-11-12 15:18:58,618 possible cell types: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:18:58,619 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:18:58,633 train shape: (2113, 22919), val shape: (8449, 22919)
2020-11-12 15:18:58,635 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:18:58,635 val labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:19:07,656 epoch: 1, train loss: 36.38362503051758, val loss: 42.309654235839844
  if not is_categorical(df_full[k]):
  if not is_categorical(df_ful

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:20:27,994 epoch: 13, train loss: 32.68611526489258, val loss: 40.97669982910156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:20:34,751 epoch: 14, train loss: 33.729705810546875, val loss: 41.10688781738281
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:20:41,531 epoch: 15, train loss: 32.910037994384766, val loss: 40.93604278564453
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:20:48,261 epoch: 16, train loss: 33.243343353271484, val loss: 40.967247009277344
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:20:54,901 epoch: 17, train loss: 33.94630432128906, val loss: 40.85228729248047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:21:01,649 epoch: 18, train loss: 32.32436752319336, val loss: 40.878

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:21:21,090 0 genes in training set are missing from prediction set
2020-11-12 15:21:21,635 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:21:21,695 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_scRNAseq_cesc_val/pollock_module
2020-11-12 15:21:21,696 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:21:21,780 0 genes in training set are missing from prediction set
2020-11-12 15:21:45,975 (8449, 11)
2020-11-12 15:21:45,976 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}


snRNAseq snRNAseq_ccrcc_train_scRNAseq_hnscc_val (2113, 33538) (10288, 26929)


2020-11-12 15:21:58,461 input dataset shape: (12401, 26918)
2020-11-12 15:21:58,464 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:21:58,464 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:21:58,482 train shape: (2113, 26918), val shape: (10288, 26918)
2020-11-12 15:21:58,485 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:21:58,485 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:22:08,916 epoch: 1, train loss: 40.21179962158203, val loss: 16.650588989257812
  if not is_categorical(df_full[k]):
  if not is_categorical(df_fu

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:23:41,224 epoch: 13, train loss: 36.97315216064453, val loss: 15.140778541564941
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:23:48,922 epoch: 14, train loss: 36.90961456298828, val loss: 15.179096221923828
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:23:56,512 epoch: 15, train loss: 36.65604782104492, val loss: 15.178590774536133
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:24:04,195 epoch: 16, train loss: 35.754249572753906, val loss: 15.18223762512207
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:24:11,836 epoch: 17, train loss: 35.504638671875, val loss: 15.188850402832031
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:24:19,523 epoch: 18, train loss: 37.080230712890625, val loss: 15.146

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:24:41,950 0 genes in training set are missing from prediction set
2020-11-12 15:24:42,594 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:24:42,638 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_scRNAseq_hnscc_val/pollock_module
2020-11-12 15:24:42,639 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:24:42,767 0 genes in training set are missing from prediction set
2020-11-12 15:25:09,947 (10000, 11)
2020-11-12 15:25:09,948 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}
2020-11-12 15:25:09,955 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:25:10,013 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_da

snRNAseq snRNAseq_ccrcc_train_scRNAseq_melanoma_val (2113, 33538) (6735, 23452)


2020-11-12 15:25:18,317 input dataset shape: (8848, 21975)
2020-11-12 15:25:18,319 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:25:18,320 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:25:18,337 train shape: (2113, 21975), val shape: (6735, 21975)
2020-11-12 15:25:18,338 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:25:18,339 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:25:26,923 epoch: 1, train loss: 50.812705993652344, val loss: 36.46693420410156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:25:33,248 epoc

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:26:42,008 epoch: 13, train loss: 44.08779525756836, val loss: 35.01332092285156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:26:48,309 epoch: 14, train loss: 44.06965637207031, val loss: 35.029293060302734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:26:54,589 epoch: 15, train loss: 45.77122116088867, val loss: 34.97929382324219
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:27:00,822 epoch: 16, train loss: 43.79228591918945, val loss: 34.9914436340332
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:27:07,027 epoch: 17, train loss: 45.320377349853516, val loss: 34.996192932128906
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:27:13,294 epoch: 18, train loss: 43.447200775146484, val loss: 34.9494

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:27:30,926 0 genes in training set are missing from prediction set
2020-11-12 15:27:31,476 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:27:32,136 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_scRNAseq_melanoma_val/pollock_module
2020-11-12 15:27:32,137 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:27:32,167 0 genes in training set are missing from prediction set
2020-11-12 15:27:44,365 (6735, 11)
2020-11-12 15:27:44,366 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}


snRNAseq snRNAseq_ccrcc_train_scRNAseq_pbmc_val (2113, 33538) (1698, 32738)


2020-11-12 15:27:47,614 input dataset shape: (3811, 20453)
2020-11-12 15:27:47,616 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:27:47,617 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:27:47,625 train shape: (2113, 20453), val shape: (1698, 20453)
2020-11-12 15:27:47,627 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:27:47,627 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:27:54,173 epoch: 1, train loss: 47.665950775146484, val loss: 22.7835636138916
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:27:59,405 epoch: 2, train 

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:28:56,834 epoch: 13, train loss: 42.269927978515625, val loss: 21.028873443603516
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:29:02,021 epoch: 14, train loss: 43.53316116333008, val loss: 21.012941360473633
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:29:07,206 epoch: 15, train loss: 41.174285888671875, val loss: 20.99415397644043
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:29:12,414 epoch: 16, train loss: 41.982181549072266, val loss: 21.00951385498047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:29:17,638 epoch: 17, train loss: 43.024070739746094, val loss: 21.023975372314453
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:29:22,832 epoch: 18, train loss: 40.74864196777344, val loss: 21.

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:29:36,981 0 genes in training set are missing from prediction set
2020-11-12 15:29:37,376 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:29:37,428 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_scRNAseq_pbmc_val/pollock_module
2020-11-12 15:29:37,429 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:29:37,445 0 genes in training set are missing from prediction set
2020-11-12 15:29:40,225 (1698, 11)
2020-11-12 15:29:40,226 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}


snRNAseq snRNAseq_ccrcc_train_scRNAseq_pdac_val (2113, 33538) (15435, 28756)


2020-11-12 15:29:57,977 input dataset shape: (17548, 28756)
2020-11-12 15:29:57,980 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
2020-11-12 15:29:57,981 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:29:58,002 train shape: (2113, 28756), val shape: (15435, 28756)
2020-11-12 15:29:58,004 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:29:58,005 val labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:30:10,363 epoch: 1, train loss: 40.1816291809082, v

2020-11-12 15:31:44,008 epoch: 12, train loss: 38.32375717163086, val loss: 26.67319107055664
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:31:52,578 epoch: 13, train loss: 37.11663055419922, val loss: 26.664531707763672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:32:01,119 epoch: 14, train loss: 36.41759490966797, val loss: 26.580360412597656
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:32:09,485 epoch: 15, train loss: 36.78709030151367, val loss: 26.587772369384766
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:32:17,817 epoch: 16, train loss: 36.19842529296875, val loss: 26.576156616210938
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:32:26,099 epoch: 17, train loss: 36.70697784423828, val loss: 26.645679473876953
  if not is_categorical(df_full[k]):
  if not is_categorical(

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:33:00,440 0 genes in training set are missing from prediction set
2020-11-12 15:33:01,110 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:33:01,170 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_scRNAseq_pdac_val/pollock_module
2020-11-12 15:33:01,171 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:33:01,300 0 genes in training set are missing from prediction set
2020-11-12 15:33:27,213 (10000, 11)
2020-11-12 15:33:27,214 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}
2020-11-12 15:33:27,221 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:33:27,279 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_dat

snRNAseq snRNAseq_gbm_train_scRNAseq_brca_val (1689, 29748) (11253, 27131)


2020-11-12 15:33:54,004 input dataset shape: (12942, 25705)
2020-11-12 15:33:54,006 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 15:33:54,007 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:33:54,032 train shape: (1689, 25705), val shape: (11253, 25705)
2020-11-12 15:33:54,034 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:33:54,035 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:34:03,425 epoch: 1, train loss: 57.92527389526367, val loss: 41.62828063964844
  if not is_cat

2020-11-12 15:35:14,138 epoch: 12, train loss: 52.34850311279297, val loss: 38.23450469970703
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:35:20,667 epoch: 13, train loss: 52.123043060302734, val loss: 38.218780517578125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:35:26,820 epoch: 14, train loss: 51.884246826171875, val loss: 38.260738372802734
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:35:33,337 epoch: 15, train loss: 51.782806396484375, val loss: 38.224853515625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:35:39,763 epoch: 16, train loss: 51.390403747558594, val loss: 38.25080871582031
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:35:46,234 epoch: 17, train loss: 51.13309097290039, val loss: 38.17744445800781
  if not is_categorical(df_full[k]):
  if not is_categorical(d

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:36:12,062 0 genes in training set are missing from prediction set
2020-11-12 15:36:12,679 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:36:12,736 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_brca_val/pollock_module
2020-11-12 15:36:12,737 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 15:36:12,905 0 genes in training set are missing from prediction set
2020-11-12 15:36:36,195 (10000, 9)
2020-11-12 15:36:36,196 {'Monocyte', 'Endothelial', 'T cells', 'B cell', 'Fibroblast', 'Microglia'}
2020-11-12 15:36:36,203 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:36:36,255 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_brca_val/pollock_module
2020-11

snRNAseq snRNAseq_gbm_train_scRNAseq_cesc_val (1689, 29748) (8449, 22928)


2020-11-12 15:36:48,455 input dataset shape: (10138, 21981)
2020-11-12 15:36:48,459 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells']
2020-11-12 15:36:48,460 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:36:48,481 train shape: (1689, 21981), val shape: (8449, 21981)
2020-11-12 15:36:48,482 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:36:48,483 val labels: ['CD4 T cell', 'CD8 T cell', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:36:56,179 epoch: 1, train loss: 44.99114227294922, val loss: 43.47978973388672
  if not is_categorical(df_full[k]):
  i

2020-11-12 15:37:55,148 epoch: 12, train loss: 40.55335998535156, val loss: 41.062278747558594
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:38:00,570 epoch: 13, train loss: 40.463653564453125, val loss: 40.89613342285156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:38:06,006 epoch: 14, train loss: 40.134456634521484, val loss: 40.93048095703125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:38:11,441 epoch: 15, train loss: 39.94361877441406, val loss: 41.17852020263672
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:38:16,855 epoch: 16, train loss: 39.91096115112305, val loss: 41.40275192260742
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:38:22,214 epoch: 17, train loss: 39.63505172729492, val loss: 41.5650749206543
  if not is_categorical(df_full[k]):
  if not is_categorical(df_

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:38:43,749 0 genes in training set are missing from prediction set
2020-11-12 15:38:44,271 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:38:44,324 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_cesc_val/pollock_module
2020-11-12 15:38:44,325 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 15:38:44,405 0 genes in training set are missing from prediction set
2020-11-12 15:39:07,494 (8449, 9)
2020-11-12 15:39:07,495 {'Monocyte', 'Endothelial', 'T cells', 'Malignant', 'Microglia', 'Fibroblast', 'B cell', 'Neuron'}


snRNAseq snRNAseq_gbm_train_scRNAseq_hnscc_val (1689, 29748) (10288, 26929)


2020-11-12 15:39:19,146 input dataset shape: (11977, 25389)
2020-11-12 15:39:19,148 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 15:39:19,149 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:39:19,179 train shape: (1689, 25389), val shape: (10288, 25389)
2020-11-12 15:39:19,181 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:39:19,182 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Endothelial', 'Erythrocyte', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:39:28,064 epoch: 1, train loss: 49.499755859375, val loss: 19.631372451782227
  if not is_categorical(df_full[k]):
  if not is_categor

2020-11-12 15:40:35,021 epoch: 12, train loss: 44.313289642333984, val loss: 15.865713119506836
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:40:41,401 epoch: 13, train loss: 44.21080017089844, val loss: 15.844925880432129
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:40:47,521 epoch: 14, train loss: 43.932472229003906, val loss: 15.85806655883789
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:40:53,678 epoch: 15, train loss: 43.77010726928711, val loss: 15.875911712646484
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:40:59,889 epoch: 16, train loss: 43.57040786743164, val loss: 15.856382369995117
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:41:06,086 epoch: 17, train loss: 43.42154312133789, val loss: 15.847105026245117
  if not is_categorical(df_full[k]):
  if not is_categorica

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:41:31,092 0 genes in training set are missing from prediction set
2020-11-12 15:41:31,725 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:41:31,779 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_hnscc_val/pollock_module
2020-11-12 15:41:31,780 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 15:41:31,960 0 genes in training set are missing from prediction set
2020-11-12 15:41:57,868 (10000, 9)
2020-11-12 15:41:57,869 {'Monocyte', 'Endothelial', 'T cells', 'Malignant', 'B cell', 'Microglia', 'Oligodendrocytes', 'Fibroblast', 'Neuron'}
2020-11-12 15:41:57,875 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:41:57,924 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_tr

snRNAseq snRNAseq_gbm_train_scRNAseq_melanoma_val (1689, 29748) (6735, 23452)


2020-11-12 15:42:05,555 input dataset shape: (8424, 21069)
2020-11-12 15:42:05,557 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 15:42:05,558 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:42:05,577 train shape: (1689, 21069), val shape: (6735, 21069)
2020-11-12 15:42:05,578 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:42:05,579 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:42:13,314 epoch: 1, train loss: 60.90229415893555, val loss: 39.12193298339844
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:43:14,261 epoch: 13, train loss: 54.81707000732422, val loss: 34.85912322998047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:43:19,189 epoch: 14, train loss: 54.74247360229492, val loss: 34.87010955810547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:43:24,271 epoch: 15, train loss: 54.49437713623047, val loss: 34.928382873535156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:43:29,265 epoch: 16, train loss: 54.21603012084961, val loss: 34.962677001953125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:43:34,226 epoch: 17, train loss: 54.022151947021484, val loss: 34.95140838623047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:43:39,315 epoch: 18, train loss: 53.66630172729492, val loss: 35.0329

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:43:53,839 0 genes in training set are missing from prediction set
2020-11-12 15:43:54,195 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:43:54,249 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_melanoma_val/pollock_module
2020-11-12 15:43:54,250 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 15:43:54,274 0 genes in training set are missing from prediction set
2020-11-12 15:44:06,246 (6735, 9)
2020-11-12 15:44:06,247 {'Monocyte', 'Fibroblast', 'B cell'}


snRNAseq snRNAseq_gbm_train_scRNAseq_pbmc_val (1689, 29748) (1698, 32738)


2020-11-12 15:44:08,867 input dataset shape: (3387, 18949)
2020-11-12 15:44:08,869 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Megakaryocyte', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:44:08,870 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:44:08,880 train shape: (1689, 18949), val shape: (1698, 18949)
2020-11-12 15:44:08,881 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:44:08,882 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Megakaryocyte', 'Monocyte', 'NK']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:44:14,024 epoch: 1, train loss: 57.386138916015625, val loss: 25.00472640991211
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:44:18,054 epoch: 2

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:45:02,050 epoch: 13, train loss: 51.667510986328125, val loss: 20.193382263183594
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:45:05,971 epoch: 14, train loss: 51.403839111328125, val loss: 20.07647132873535
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:45:09,966 epoch: 15, train loss: 51.16676712036133, val loss: 20.091588973999023
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:45:13,940 epoch: 16, train loss: 51.02677917480469, val loss: 20.05445098876953
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:45:17,924 epoch: 17, train loss: 50.75649642944336, val loss: 20.11622428894043
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:45:21,884 epoch: 18, train loss: 50.482147216796875, val loss: 20.15

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:45:32,863 0 genes in training set are missing from prediction set
2020-11-12 15:45:33,304 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:45:33,353 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_pbmc_val/pollock_module
2020-11-12 15:45:33,354 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 15:45:33,369 0 genes in training set are missing from prediction set
2020-11-12 15:45:36,005 (1698, 9)
2020-11-12 15:45:36,006 {'Monocyte', 'T cells', 'Microglia', 'Fibroblast', 'B cell', 'Neuron'}


snRNAseq snRNAseq_gbm_train_scRNAseq_pdac_val (1689, 29748) (15435, 28756)


2020-11-12 15:45:52,817 input dataset shape: (17124, 27015)
2020-11-12 15:45:52,819 possible cell types: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg', 'Tuft']
2020-11-12 15:45:52,820 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:45:52,846 train shape: (1689, 27015), val shape: (15435, 27015)
2020-11-12 15:45:52,848 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:45:52,849 val labels: ['Acinar', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Erythrocyte', 'Fibroblast', 'Islet', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg', 'Tuft']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:46:03,741 ep

2020-11-12 15:47:21,302 epoch: 12, train loss: 45.25584411621094, val loss: 27.404354095458984
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:47:28,337 epoch: 13, train loss: 45.06218719482422, val loss: 27.434616088867188
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:47:35,383 epoch: 14, train loss: 44.908939361572266, val loss: 27.486778259277344
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:47:42,494 epoch: 15, train loss: 44.66334915161133, val loss: 27.623626708984375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:47:49,520 epoch: 16, train loss: 44.54719161987305, val loss: 27.65239143371582
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:47:56,617 epoch: 17, train loss: 44.31052780151367, val loss: 27.657808303833008
  if not is_categorical(df_full[k]):
  if not is_categorical

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:48:26,171 0 genes in training set are missing from prediction set
2020-11-12 15:48:26,813 starting batch 1 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:48:26,869 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_scRNAseq_pdac_val/pollock_module
2020-11-12 15:48:26,870 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 15:48:26,999 0 genes in training set are missing from prediction set
2020-11-12 15:48:51,966 (10000, 9)
2020-11-12 15:48:51,967 {'Monocyte', 'Endothelial', 'T cells', 'Malignant', 'B cell', 'Fibroblast', 'Oligodendrocytes', 'Microglia', 'Neuron'}
2020-11-12 15:48:51,973 starting batch 2 of 2
  if not is_categorical(df_full[k]):
2020-11-12 15:48:52,025 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_tra

snRNAseq snRNAseq_brca_train_snATACseq_brca_val (2455, 29175) (9028, 19891)


2020-11-12 15:49:14,615 input dataset shape: (11483, 17494)
2020-11-12 15:49:14,619 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:49:14,620 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:49:14,641 train shape: (2455, 17494), val shape: (9028, 17494)
2020-11-12 15:49:14,643 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:49:14,643 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:49:23,027 epoch: 1, train loss: 15.670090675354004, val loss: 88.56061553955078
  if not is_categorical(df_full[k]):
  if not is_categorical(df_

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:50:37,014 epoch: 13, train loss: 13.71638011932373, val loss: 86.762939453125
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:50:43,074 epoch: 14, train loss: 13.666613578796387, val loss: 86.75990295410156
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:50:49,267 epoch: 15, train loss: 13.614103317260742, val loss: 86.21795654296875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:50:55,370 epoch: 16, train loss: 13.57142162322998, val loss: 86.17097473144531
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:51:01,456 epoch: 17, train loss: 13.528273582458496, val loss: 85.70599365234375
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:51:07,643 epoch: 18, train loss: 13.460651397705078, val loss: 85.49415

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:51:25,612 0 genes in training set are missing from prediction set
2020-11-12 15:51:26,017 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:51:26,087 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_snATACseq_brca_val/pollock_module
2020-11-12 15:51:26,088 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:51:26,277 0 genes in training set are missing from prediction set
2020-11-12 15:52:00,698 (9028, 13)
2020-11-12 15:52:00,700 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Adipocyte', 'Malignant', 'B cell', 'Mast', 'Fibroblast', 'Plasma', 'NK'}


snRNAseq snRNAseq_brca_train_snATACseq_gbm_val (2455, 29175) (5650, 19891)


2020-11-12 15:52:07,309 input dataset shape: (8105, 17494)
2020-11-12 15:52:07,312 possible cell types: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 15:52:07,312 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:52:07,329 train shape: (2455, 17494), val shape: (5650, 17494)
2020-11-12 15:52:07,330 train labels: ['Adipocyte', 'B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:52:07,331 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:52:14,806 epoch: 1, train loss: 17.89179039001465, val loss: 31.250410079956055
  if not is_categorical(df_full[

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:53:20,599 epoch: 13, train loss: 15.777240753173828, val loss: 30.162485122680664
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:53:26,060 epoch: 14, train loss: 15.710516929626465, val loss: 30.084972381591797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:53:31,640 epoch: 15, train loss: 15.662331581115723, val loss: 30.04160499572754
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:53:37,183 epoch: 16, train loss: 15.587051391601562, val loss: 29.898494720458984
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:53:42,754 epoch: 17, train loss: 15.529521942138672, val loss: 29.844036102294922
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:53:48,274 epoch: 18, train loss: 15.482319831848145, val loss: 

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:54:04,110 0 genes in training set are missing from prediction set
2020-11-12 15:54:04,577 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:54:04,640 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_brca_train_snATACseq_gbm_val/pollock_module
2020-11-12 15:54:04,641 ['Adipocyte' 'B cell' 'CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial'
 'Fibroblast' 'Malignant' 'Mast' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:54:04,755 0 genes in training set are missing from prediction set
2020-11-12 15:54:25,043 (5650, 13)
2020-11-12 15:54:25,044 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'Adipocyte', 'Malignant', 'B cell', 'Fibroblast', 'Mast', 'Plasma', 'NK'}


snRNAseq snRNAseq_ccrcc_train_snATACseq_brca_val (2113, 33538) (9028, 19891)


2020-11-12 15:54:34,932 input dataset shape: (11141, 18895)
2020-11-12 15:54:34,934 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:54:34,935 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:54:34,956 train shape: (2113, 18895), val shape: (9028, 18895)
2020-11-12 15:54:34,958 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:54:34,959 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:54:43,574 epoch: 1, train loss: 14.199990272521973, val loss: 93.15211486816406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-1

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:55:53,089 epoch: 13, train loss: 12.646476745605469, val loss: 94.86984252929688
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:55:58,864 epoch: 14, train loss: 12.007148742675781, val loss: 93.4389419555664
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:56:04,576 epoch: 15, train loss: 12.157752990722656, val loss: 93.32713317871094
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:56:10,288 epoch: 16, train loss: 11.961793899536133, val loss: 93.42365264892578
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:56:15,985 epoch: 17, train loss: 11.981693267822266, val loss: 93.40191650390625
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:56:21,644 epoch: 18, train loss: 12.923271179199219, val loss: 93.00

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:56:38,092 0 genes in training set are missing from prediction set
2020-11-12 15:56:38,406 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:56:38,471 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_snATACseq_brca_val/pollock_module
2020-11-12 15:56:38,471 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:56:38,652 0 genes in training set are missing from prediction set
2020-11-12 15:57:13,900 (9028, 11)
2020-11-12 15:57:13,901 {'Monocyte', 'Endothelial', 'CD4 T cell', 'CD8 T cell', 'Malignant', 'Dendritic', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}


snRNAseq snRNAseq_ccrcc_train_snATACseq_gbm_val (2113, 33538) (5650, 19891)


2020-11-12 15:57:20,611 input dataset shape: (7763, 18895)
2020-11-12 15:57:20,613 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'Plasma', 'T cells', 'Treg']
2020-11-12 15:57:20,613 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:57:20,631 train shape: (2113, 18895), val shape: (5650, 18895)
2020-11-12 15:57:20,632 train labels: ['CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Epithelial', 'Fibroblast', 'Malignant', 'Monocyte', 'NK', 'Plasma', 'Treg']
2020-11-12 15:57:20,633 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:57:27,844 epoch: 1, train loss: 16.44338607788086, val loss: 33.03181457519531
  if not is_categorical(df_full[k]):
  if not is_categori

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:58:31,214 epoch: 13, train loss: 14.098066329956055, val loss: 32.27467346191406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:58:36,507 epoch: 14, train loss: 13.739319801330566, val loss: 32.35224914550781
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:58:41,854 epoch: 15, train loss: 14.296427726745605, val loss: 32.18043518066406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:58:47,129 epoch: 16, train loss: 13.700763702392578, val loss: 32.21456527709961
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:58:52,368 epoch: 17, train loss: 13.931467056274414, val loss: 32.13501739501953
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:58:57,630 epoch: 18, train loss: 13.863207817077637, val loss: 32.1

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 15:59:12,839 0 genes in training set are missing from prediction set
2020-11-12 15:59:13,260 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 15:59:13,318 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_ccrcc_train_snATACseq_gbm_val/pollock_module
2020-11-12 15:59:13,319 ['CD4 T cell' 'CD8 T cell' 'Dendritic' 'Endothelial' 'Epithelial'
 'Fibroblast' 'Malignant' 'Monocyte' 'NK' 'Plasma' 'Treg']
2020-11-12 15:59:13,433 0 genes in training set are missing from prediction set
2020-11-12 15:59:34,471 (5650, 11)
2020-11-12 15:59:34,472 {'Monocyte', 'Endothelial', 'CD4 T cell', 'Treg', 'CD8 T cell', 'Dendritic', 'Malignant', 'Fibroblast', 'Plasma', 'Epithelial', 'NK'}


snRNAseq snRNAseq_gbm_train_snATACseq_brca_val (1689, 29748) (9028, 19891)


2020-11-12 15:59:43,639 input dataset shape: (10717, 17613)
2020-11-12 15:59:43,642 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Microglia', 'Monocyte', 'NK', 'Neuron', 'Oligodendrocytes', 'T cells', 'Treg']
2020-11-12 15:59:43,643 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 15:59:43,668 train shape: (1689, 17613), val shape: (9028, 17613)
2020-11-12 15:59:43,670 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 15:59:43,671 val labels: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Fibroblast', 'Malignant', 'Mast', 'Monocyte', 'NK', 'Treg']
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 15:59:50,444 epoch: 1, train loss: 15.811033248901367, val loss: 91.64494323730469
  if not is_categorical(df_full[k]):
  if not is_categorical(df_f

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:00:46,567 epoch: 13, train loss: 13.110577583312988, val loss: 87.7928695678711
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:00:51,216 epoch: 14, train loss: 13.037823677062988, val loss: 87.94648742675781
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:00:55,885 epoch: 15, train loss: 12.997196197509766, val loss: 88.01280975341797
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:01:00,530 epoch: 16, train loss: 12.945866584777832, val loss: 87.77996826171875
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:01:05,173 epoch: 17, train loss: 12.894622802734375, val loss: 88.0226058959961
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:01:09,842 epoch: 18, train loss: 12.872392654418945, val loss: 87.473

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 16:01:23,848 0 genes in training set are missing from prediction set
2020-11-12 16:01:24,322 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 16:01:24,380 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_snATACseq_brca_val/pollock_module
2020-11-12 16:01:24,381 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 16:01:24,569 0 genes in training set are missing from prediction set
2020-11-12 16:01:59,053 (9028, 9)
2020-11-12 16:01:59,054 {'Endothelial', 'Malignant', 'T cells', 'B cell', 'Fibroblast', 'Microglia', 'Neuron'}


snRNAseq snRNAseq_gbm_train_snATACseq_gbm_val (1689, 29748) (5650, 19891)


2020-11-12 16:02:05,275 input dataset shape: (7339, 17613)
2020-11-12 16:02:05,277 possible cell types: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 16:02:05,278 using validation key
  if not is_categorical(df_full[k]):
2020-11-12 16:02:05,299 train shape: (1689, 17613), val shape: (5650, 17613)
2020-11-12 16:02:05,300 train labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Monocyte', 'Neuron', 'Oligodendrocytes', 'T cells']
2020-11-12 16:02:05,301 val labels: ['B cell', 'Endothelial', 'Fibroblast', 'Malignant', 'Microglia', 'Neuron', 'Oligodendrocytes', 'T cells']
  if not is_categorical(df_full[k]):




2020-11-12 16:02:10,270 5 out of the last 33 calls to <function compute_loss at 0x7fbfea59c3b0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings is likely due to passing python objects instead of tensors. Also, tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. Please refer to https://www.tensorflow.org/tutorials/customization/performance#python_or_tensor_args and https://www.tensorflow.org/api_docs/python/tf/function for more details.
  if not is_categorical(df_full[k]):
2020-11-12 16:02:11,434 epoch: 1, train loss: 17.64562225341797, val loss: 31.190580368041992
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:02:15,768 epoch: 2, train loss: 16.52574920654297, val loss: 31.497787475585938
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:02:20,071 epoch: 3, train loss: 16.38599967956543, val loss: 

  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:03:07,037 epoch: 14, train loss: 14.76716423034668, val loss: 30.16234588623047
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:03:11,272 epoch: 15, train loss: 14.689553260803223, val loss: 30.175487518310547
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:03:15,497 epoch: 16, train loss: 14.660160064697266, val loss: 30.189579010009766
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:03:19,700 epoch: 17, train loss: 14.605735778808594, val loss: 30.070045471191406
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:03:23,976 epoch: 18, train loss: 14.532958984375, val loss: 30.05303382873535
  if not is_categorical(df_full[k]):
  if not is_categorical(df_full[k]):
2020-11-12 16:03:28,223 epoch: 19, train loss: 14.535026550292969, val loss: 30.03

False


  d['descr'] = dtype_to_descr(array.dtype)
  if not is_categorical(df_full[k]):
2020-11-12 16:03:36,316 0 genes in training set are missing from prediction set
2020-11-12 16:03:36,705 starting batch 1 of 1
  if not is_categorical(df_full[k]):
2020-11-12 16:03:36,757 /home/estorrs/pollock/benchmarking/results/10272020_teir1_cross_datatype/snRNAseq/snRNAseq_gbm_train_snATACseq_gbm_val/pollock_module
2020-11-12 16:03:36,758 ['B cell' 'Endothelial' 'Fibroblast' 'Malignant' 'Microglia' 'Monocyte'
 'Neuron' 'Oligodendrocytes' 'T cells']
2020-11-12 16:03:36,869 0 genes in training set are missing from prediction set
2020-11-12 16:03:57,258 (5650, 9)
2020-11-12 16:03:57,260 {'Endothelial', 'T cells', 'Malignant', 'B cell', 'Fibroblast', 'Microglia', 'Oligodendrocytes', 'Neuron'}


###### testing stuff

In [22]:
# a = sc.read_h5ad(adata_map['snATACseq']['gbm']['train'])
# a

In [13]:
train, val = sc.read_h5ad(adata_map['scRNAseq']['pbmc']['train']), sc.read_h5ad(adata_map['scRNAseq']['brca']['val'])

In [14]:
module_dir = os.path.join(SANDBOX_DIR, 'temp_module')

In [15]:
train.obs['is_validation'] = [False] * train.shape[0]
val.obs['is_validation'] = [True] * val.shape[0]
combined = train.concatenate(val)
combined

AnnData object with n_obs × n_vars = 12193 × 18511
    obs: 'leiden', 'cell_type', 'is_validation', 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.mito', 'nCount_SCT', 'nFeature_SCT', 'SCT_snn_res.0.5', 'seurat_clusters', 'sample', 'tissue_type', 'cell_type_specific', 'Piece_ID', 'Clinical_Subtype', 'Bulk_PAM50', 'doublet_score', 'predicted_doublet', 'ident', 'batch'
    var: 'gene_ids-0', 'sct.detection_rate-1', 'sct.gmean-1', 'sct.variance-1', 'sct.residual_mean-1', 'sct.residual_variance-1', 'sct.variable-1'

In [18]:
train.shape, val.shape

((940, 32738), (11253, 27131))

In [17]:
np.count_nonzero(combined.obs['is_validation']), np.count_nonzero(~combined.obs['is_validation'])

(11253, 940)

In [None]:
# pds = PollockDataset(train, cell_type_key=CELL_TYPE_KEY,
#                      dataset_type='training')

In [24]:
val.shape

(11253, 27131)

In [26]:
pds = PollockDataset(combined, cell_type_key=CELL_TYPE_KEY,
                     dataset_type='training', validation_key='is_validation')

2020-11-12 09:46:56,627 normalizing the expression counts for model training
2020-11-12 09:47:01,436 input dataset shape: (12193, 18511)
2020-11-12 09:47:01,439 possible cell types: ['B cell', 'CD4 T cell', 'CD8 T cell', 'Dendritic', 'Endothelial', 'Erythrocyte', 'Fibroblast', 'Malignant', 'Mast', 'Megakaryocyte', 'Monocyte', 'NK', 'Plasma', 'Treg']
  if not is_categorical(df_full[k]):
2020-11-12 09:47:01,452 train shape: (940, 18511), val shape: (11253, 18511)
2020-11-12 09:47:03,561 training dataset shape: (940, 18511)
2020-11-12 09:47:03,563 validation dataset shape: (11253, 18511)


In [None]:
pm = PollockModel(pds.cell_types, pds.train_adata.shape[1], alpha=.0001, latent_dim=25)

In [None]:
pm.fit(pds, epochs=2)

In [None]:
pm.save(pds, module_dir)

In [None]:
val.shape

In [None]:
preds = predict_from_anndata(val.copy(),
        '/home/estorrs/pollock/benchmarking/sandbox/temp_module', adata_batch_size=10000)
preds

In [None]:
df = pd.DataFrame.from_dict({
    'cell_id': preds.index.to_list(),
    'groundtruth': val.obs.loc[preds.index][CELL_TYPE_KEY].to_list(),
    'predicted': preds['predicted_cell_type'],
    'probability': preds['cell_type_probability']
})
df

##### scanpy ingest

In [None]:
def ingest_preprocess(adata):
    adata.var['mt'] = adata.var_names.str.startswith('MT-')
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=2500)
    adata.raw = adata
    adata = adata[:, adata.var.highly_variable]
    sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])
    sc.pp.scale(adata)
    
    return adata

def run_scanpy_workflow(train, val, cell_type_key):
    var_names = train.var_names.intersection(val.var_names)
    train = train[:, var_names]
    val = val[:, var_names]
    
    groundtruth = val.obs[cell_type_key].to_list()

    sc.pp.pca(train)
    sc.pp.neighbors(train)
    sc.tl.umap(train)
    
    sc.tl.ingest(val, train, obs=cell_type_key)
    
    df = pd.DataFrame.from_dict({
        'cell_id': val.obs.index.to_list(),
        'groundtruth': groundtruth,
        'predicted': val.obs[cell_type_key].to_list(),
        'probability': [np.nan] * val.shape[0]
    })
    
    return df

In [None]:
run_workflow_for_datasets(adata_map, run_scanpy_workflow, 'scanpy_ingest', RESULTS_DIR)

In [None]:
run_workflow_for_cross_disease(adata_map, run_scanpy_workflow, 'scanpy_ingest', RESULTS_CROSS_DISEASE_DIR)

In [None]:
run_workflow_for_cross_datatype(adata_map, run_scanpy_workflow, 'scanpy_ingest', RESULTS_CROSS_DTYPE_DIR)

###### testing stuff

In [None]:
train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()

In [None]:
train, val = ingest_preprocess(train), ingest_preprocess(val)

var_names = train.var_names.intersection(val.var_names)
train = train[:, var_names]
val = val[:, var_names]

sc.pp.pca(train)
sc.pp.neighbors(train)
sc.tl.umap(train)

In [None]:
sc.pl.umap(train, color='cell_type')

In [None]:
sc.tl.ingest(val, train, obs=CELL_TYPE_KEY)
val.uns[f'{CELL_TYPE_KEY}_colors'] = train.uns[f'{CELL_TYPE_KEY}_colors']

In [None]:
sc.pl.umap(val, color=[CELL_TYPE_KEY], wspace=0.5)


In [None]:
val

In [None]:
val.obs

##### ACTINN

In [None]:
def run_actinn_workflow(train, val, cell_type_key):
    X = train.X.toarray() if 'sparse' in str(type(train.X)) else train.X
    train_counts_df = pd.DataFrame(data=X.transpose(), index=train.var.index.to_list(),
                        columns=train.obs.index.to_list())
    X = val.X.toarray() if 'sparse' in str(type(val.X)) else val.X
    val_counts_df = pd.DataFrame(data=X.transpose(), index=val.var.index.to_list(),
                        columns=val.obs.index.to_list())
    
    train_counts_fp = os.path.join(SANDBOX_DIR, 'train_counts.txt')
    val_counts_fp = os.path.join(SANDBOX_DIR, 'val_counts.txt')
    train_counts_df.to_csv(train_counts_fp, sep='\t')
    val_counts_df.to_csv(val_counts_fp, sep='\t')
    
    train_h5_fp = os.path.join(SANDBOX_DIR, 'train.h5')
    train_annotations_fp = os.path.join(SANDBOX_DIR, 'train_annotations.txt')
    val_h5_fp = os.path.join(SANDBOX_DIR, 'val.h5')

    train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', index=True, header=False)

    subprocess.check_output(('python', ACTINN_FORMAT, '-i', train_counts_fp,
                            '-o', train_h5_fp.replace('.h5', ''), '-f', 'txt'))
    subprocess.check_output(('python', ACTINN_FORMAT, '-i', val_counts_fp,
                            '-o', val_h5_fp.replace('.h5', ''), '-f', 'txt'))
    # dont use probablity argument or it breaks
    subprocess.check_output(('python', ACTINN_PREDICT, '-trs', train_h5_fp,
                            '-trl', train_annotations_fp, '-ts', val_h5_fp))
    
    prediction_df = pd.read_csv('predicted_label.txt', sep='\t')
    
    df = pd.DataFrame.from_dict({
        'cell_id': prediction_df['cellname'].to_list(),
        'predicted': prediction_df['celltype'].to_list(),
        'probability': [np.nan] * prediction_df.shape[0]
    })
    
    df = pd.merge(df, val.obs, left_on='cell_id', right_index=True)
    df = df[['cell_id', 'cell_type', 'predicted', 'probability']]
    df.columns = ['cell_id', 'groundtruth', 'predicted', 'probability']
    
    return df
    
    
    
    


In [None]:
ACTINN_FORMAT = '/home/estorrs/ACTINN/actinn_format.py'
ACTINN_PREDICT = '/home/estorrs/ACTINN/actinn_predict.py'

run_workflow_for_datasets(adata_map, run_actinn_workflow, 'actinn', RESULTS_DIR)

###### testing stuff

In [None]:
train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()

In [None]:
# train.obs['dataset'] = ['train'] * train.shape[0]
# val.obs['dataset'] = ['val'] * val.shape[0]
# combined = train.concatenate(val)
# combined

In [None]:
train_counts_df = pd.DataFrame(data=train.X.transpose().toarray(), index=train.var.index.to_list(),
                        columns=train.obs.index.to_list())
val_counts_df = pd.DataFrame(data=val.X.transpose().toarray(), index=val.var.index.to_list(),
                        columns=val.obs.index.to_list())
train_counts_df

In [None]:
train_counts_fp = os.path.join(SANDBOX_DIR, 'train_counts.txt')
val_counts_fp = os.path.join(SANDBOX_DIR, 'val_counts.txt')
train_counts_df.to_csv(train_counts_fp, sep='\t')
val_counts_df.to_csv(val_counts_fp, sep='\t')

python actinn_format.py -i input_file -o output_prefix -f format

python actinn_format.py -i ./test_data/train_set.txt.gz -o train_set -f txt


In [None]:
train_h5_fp = os.path.join(SANDBOX_DIR, 'train.h5')
train_annotations_fp = os.path.join(SANDBOX_DIR, 'train_annotations.txt')
val_h5_fp = os.path.join(SANDBOX_DIR, 'val.h5')

train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', index=True, header=False)

subprocess.check_output(('python', '/home/estorrs/ACTINN/actinn_format.py', '-i', train_counts_fp,
                        '-o', train_h5_fp.replace('.h5', ''), '-f', 'txt'))

In [None]:
subprocess.check_output(('python', '/home/estorrs/ACTINN/actinn_format.py', '-i', val_counts_fp,
                        '-o', val_h5_fp.replace('.h5', ''), '-f', 'txt'))

In [None]:
train.obs[[CELL_TYPE_KEY]]

python actinn_predict.py -trs training_set -trl training_label -ts test_set -lr learning_rat -ne num_epoch -ms minibatch_size -pc print_cost -op output_probability


-trs Path to the training set, must be HDF5 format with key "dge".

-trl Path to the training label (the cell types for the training set), must be tab separated text file with no column and row names.

-ts Path to test sets, must be HDF5 format with key "dge".

-lr Learning rate (default: 0.0001). We can increase the learning rate if the cost drops too slow, or decrease the learning rate if the cost drops super fast in the beginning and starts to fluctuate in later epochs.

-ne Number of epochs (default: 50). The number of epochs can be determined by looking at the cost after each epoch. If the cost starts to decrease very slowly after ceartain epoch, then the "ne" parameter should be set to that epoch number.

-ms Minibatch size (default: 128). This parameter can be set larger when training a large dataset.

-pc Print cost (default: True). Whether to print cost after each 5 epochs.

-op Output probabilities for each cell being the cell types in the training data (default: False).


In [None]:
subprocess.check_output(('python', '/home/estorrs/ACTINN/actinn_predict.py', '-trs', train_h5_fp,
                        '-trl', train_annotations_fp, '-ts', val_h5_fp))

In [None]:
' '.join(('python', '/home/estorrs/ACTINN/actinn_predict.py', '-trs', train_h5_fp,
                        '-trl', train_annotations_fp, '-ts', val_h5_fp,
                        '-op', 'True'))

In [None]:
prediction_df = pd.read_csv('predicted_label.txt', sep='\t')
prediction_df

In [None]:
df = pd.DataFrame.from_dict({
        'cell_id': prediction_df['cellname'].to_list(),
        'prediction': prediction_df['celltype'].to_list(),
        'probability': [np.nan] * val.shape[0]
    })
df

In [None]:
val.obs

In [None]:
df = pd.merge(df, val.obs, left_on='cell_id', right_index=True)
df = df[['cell_id', 'cell_type', 'prediction', 'probability']]
df.columns = ['cell_id', 'groundtruth', 'prediction', 'probability']
df


##### Seurat

In [None]:
def run_seurat_transfer(train, val, cell_type_key):
    # save the input data for the seurat script
    train_counts_fp, val_counts_fp = (os.path.join(SANDBOX_DIR, 'train_counts.txt'),
                                        os.path.join(SANDBOX_DIR, 'val_counts.txt'))
    train_annotations_fp, val_annotations_fp = (os.path.join(SANDBOX_DIR, 'train_annotations.txt'),
                                                os.path.join(SANDBOX_DIR, 'val_annotations.txt'))

    ## prepare train and val count matrices
    X = train.X.toarray() if 'sparse' in str(type(train.X)) else train.X
    train_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=train.var.index,
                                columns=train.obs.index)
    train_counts.index.name = ''
    # for some reason SCTransform fails if the integer values are too high, so capping them here
    cap = pow(2, 14)
    train_counts.values[train_counts.values>cap] = cap
    train_counts.to_csv(train_counts_fp, sep='\t', header=True, index=True)
    
    X = val.X.toarray() if 'sparse' in str(type(val.X)) else val.X
    val_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=val.var.index,
                                columns=val.obs.index)
    val_counts.index.name = ''
    val_counts.values[val_counts.values>cap] = cap
    val_counts.to_csv(val_counts_fp, sep='\t', header=True, index=True)

    train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', header=False, index=False)
    val.obs[[CELL_TYPE_KEY]].to_csv(val_annotations_fp, sep='\t', header=False, index=False)
    
    # actually run the script and read the results back in
    prediction_fp = os.path.join(SANDBOX_DIR, 'seurat_predictions.txt')
    try:
        subprocess.check_output(('Rscript', SEURAT_SCRIPT, train_counts_fp, train_annotations_fp,
                            val_counts_fp, val_annotations_fp, prediction_fp))
    except subprocess.CalledProcessError as e:
        print(f'called process error', e)
        return pd.DataFrame()
    
    # format the predictions dataframe
    df = pd.read_csv(prediction_fp, sep='\t')
    df.index = [x.replace('.', '-') for x in df.index]
    # also remove that weird X thing seurat sometimes puts there if first char is _
    df.index = [x[1:] if x[:2]=='X_' else x for x in df.index]
    df = pd.merge(df, val.obs, left_index=True, right_index=True)
    df['cell_id'] = df.index.to_list()
    try:
        df = df[['cell_id', 'cell_type', 'predicted.id', 'prediction.score.max']]        
        df.columns = ['cell_id', 'groundtruth', 'predicted', 'probability']
        return df
    except KeyError as e:
        print(f'key error', e)
        return pd.DataFrame()

In [None]:
SEURAT_SCRIPT = '/home/estorrs/pollock/benchmarking/tools/run_seurat_workflow.R'
run_workflow_for_datasets(adata_map, run_seurat_transfer, 'seurat_transfer', RESULTS_DIR)

In [None]:
run_workflow_for_cross_disease(adata_map, run_seurat_transfer, 'seurat_transfer', RESULTS_CROSS_DISEASE_DIR)

In [None]:
run_workflow_for_cross_datatype(adata_map, run_seurat_transfer, 'seurat_transfer', RESULTS_CROSS_DTYPE_DIR)

###### testing stuff

In [None]:
# train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()
train, val = sc.read_h5ad(adata_map['scRNAseq']['brca']['train']), sc.read_h5ad(adata_map['scRNAseq']['brca']['val'])

In [None]:
pow(2, 14)

In [None]:
# save the input data for the seurat script
train_counts_fp, val_counts_fp = (os.path.join(SANDBOX_DIR, 'train_counts.txt'),
                                    os.path.join(SANDBOX_DIR, 'val_counts.txt'))
train_annotations_fp, val_annotations_fp = (os.path.join(SANDBOX_DIR, 'train_annotations.txt'),
                                            os.path.join(SANDBOX_DIR, 'val_annotations.txt'))

## prepare train and val count matrices
X = train.X.toarray() if 'sparse' in str(type(train.X)) else train.X
train_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=train.var.index,
                            columns=train.obs.index)
train_counts.index.name = ''
# for some reason SCTransform fails if the integer values are too high, so capping them here
cap = pow(2, 14)
train_counts.values[train_counts.values>cap] = cap
train_counts.to_csv(train_counts_fp, sep='\t', header=True, index=True)

X = val.X.toarray() if 'sparse' in str(type(val.X)) else val.X
val_counts = pd.DataFrame(data=X.transpose().astype(np.int32), index=val.var.index,
                            columns=val.obs.index)
val_counts.index.name = ''
val_counts.values[val_counts.values>cap] = cap
val_counts.to_csv(val_counts_fp, sep='\t', header=True, index=True)

train.obs[[CELL_TYPE_KEY]].to_csv(train_annotations_fp, sep='\t', header=False, index=False)
val.obs[[CELL_TYPE_KEY]].to_csv(val_annotations_fp, sep='\t', header=False, index=False)

In [None]:
train_counts

In [None]:
train_counts

In [None]:
type(train_counts.values), type(train_counts.values[0, 0])

In [None]:
vals = sorted(set(train_counts.values.flatten()))
vals

In [None]:
vals[:10], vals[-10:]

In [None]:
train_counts.values[train_counts.values>1000] = 1000

In [None]:
np.where(train_counts>1)

In [None]:
# actually run the script and read the results back in
prediction_fp = os.path.join(SANDBOX_DIR, 'seurat_predictions.txt')
subprocess.check_output(('Rscript', SEURAT_SCRIPT, train_counts_fp, train_annotations_fp,
                    val_counts_fp, val_annotations_fp, prediction_fp))

In [None]:
# format the predictions dataframe
df = pd.read_csv(prediction_fp, sep='\t')
df.index = [x.replace('.', '-') for x in df.index]
# also remove that weird X thing seurat sometimes puts there
df.index = [x[1:] if x[:2]=='X_' else x for x in df.index]
df = pd.merge(df, val.obs, left_index=True, right_index=True)
df['cell_id'] = df.index.to_list()
df = df[['cell_id', 'cell_type', 'predicted.id', 'prediction.score.max']]        
df.columns = ['cell_id', 'groundtruth', 'predicted', 'probability']
df

In [None]:
val.obs

##### SingleCellNet

In [None]:
# !pip install git+https://github.com/pcahan1/PySingleCellNet/

In [None]:
import pySingleCellNet as pySCN

In [None]:
def run_SingleCellNet(train, val, cell_type_key):
    # save the input data for the seurat script
    cgenesA, xpairs, tspRF = pySCN.scn_train(train,
            nTopGenes=100, nRand=100, nTrees=1000, nTopGenePairs=100,
            dLevel=cell_type_key, stratify=True, limitToHVG=True, )
    predictions = pySCN.scn_classify(val, cgenesA, xpairs, tspRF, nrand = 0)
    
    df = pd.merge(predictions.obs[['SCN_class']], val.obs, left_index=True, right_index=True)
    
    df = df[['cell_type', 'SCN_class']]
    df.columns = ['groundtruth', 'predicted']
    df['cell_id'] = df.index.to_list()
    df['probability'] = [np.nan] * df.shape[0]
    df = df[['cell_id', 'groundtruth', 'predicted', 'probability']]
    

    return df

In [None]:
run_workflow_for_datasets(adata_map, run_SingleCellNet, 'SingleCellNet', RESULTS_DIR)

In [None]:
run_workflow_for_cross_disease(adata_map, run_SingleCellNet, 'SingleCellNet', RESULTS_CROSS_DISEASE_DIR)

In [None]:
run_workflow_for_cross_datatype(adata_map, run_SingleCellNet, 'SingleCellNet', RESULTS_CROSS_DTYPE_DIR)

###### testing stuff

In [None]:
train, val = adata_map['scRNAseq']['pbmc']['train'].copy(), adata_map['scRNAseq']['pbmc']['val'].copy()

In [None]:
cgenesA, xpairs, tspRF = pySCN.scn_train(train,
            nTopGenes = 100, nRand = 100, nTrees = 1000 ,nTopGenePairs = 100,
            dLevel = "cell_type", stratify=True, limitToHVG=True, )

In [None]:
predictions = pySCN.scn_classify(val, cgenesA, xpairs, tspRF, nrand = 0)


In [None]:
predictions.obs

In [None]:
df = pd.merge(predictions.obs[['SCN_class']], val.obs, left_index=True, right_index=True)

df = df[['cell_type', 'SCN_class']]
df.index.name = 'cell_id'
df.columns = ['groundtruth', 'predictions']
df['probability'] = [np.nan] * df.shape[0]
df

##### pollock

###### testing stuff