In [2]:
import argparse
import os
from epitome.models import *
from epitome.functions import *
from epitome.viz  import *

from epitome.constants import *
from epitome.motif_functions import *
import yaml
import subprocess
from timeit import default_timer as timer

examples.directory is deprecated; in the future, examples will be found relative to the 'datapath' directory.
  "found relative to the 'datapath' directory.".format(key))


### Set Up

In [16]:
results_path = "results"
epitome_data_path = "data/epitome_data" 
motif_dir = "data/motif_data/"
feature_path = os.path.join(epitome_data_path, "feature_name")

# TF's being predicted
TF = "JUND"
query_cell = 'K562' #'T47D'

train_iterations = 5000
test_iterations = 10000

In [17]:
def setup_directories(TF, results_path=results_path):
    # create user directories if they do not exist
    epitome_results_dir = os.path.join(results_path, "epitome_results")
    if not os.path.exists(epitome_results_dir):
        os.makedirs(epitome_results_dir)

    epitome_models_dir = os.path.join(results_path, "epitome_models")
    if not os.path.exists(epitome_results_dir):
        os.makedirs(epitome_models_dir)

    # Folder based on TF being predicted
    tf_results_dir = os.path.join(epitome_results_dir, TF + "_results")
    if not os.path.exists(tf_results_dir):
        os.makedirs(tf_results_dir)

    # Folder based on TF being predicted
    tf_model_dir = os.path.join(epitome_models_dir, TF + "_models")
    if not os.path.exists(tf_model_dir):
        os.makedirs(tf_model_dir)
    
    return tf_results_dir, tf_model_dir

tf_results_dir, tf_model_dir = setup_directories(TF)

### Load in Data for Epitome

In [18]:
train_data = scipy.sparse.load_npz(os.path.join(epitome_data_path, 'train.npz')).toarray()
valid_data = scipy.sparse.load_npz(os.path.join(epitome_data_path, 'valid.npz')).toarray()
test_data = scipy.sparse.load_npz(os.path.join(epitome_data_path, 'test.npz')).toarray()
data = {Dataset.TRAIN: train_data, Dataset.VALID: valid_data, Dataset.TEST: test_data}
# all_data = np.concatenate((data[Dataset.TRAIN], data[Dataset.VALID], data[Dataset.TEST]), axis=1)

In [19]:
motifmat = np.load(os.path.join(motif_dir, "OVERLAP_HOCOMOCO_unique_motifmat.npz"))["tf"]
motifmap = pd.read_csv(os.path.join(motif_dir, "OVERLAP_HOCOMOCO_unique_motifmap.csv"), 
                       header=None).rename(columns={0:"Index", 1:"TF"})

In [8]:
# Determine Anchor TF's we have data for
epitome_tfs = list(motifmap["TF"].unique()) + ["DNase"]
anchor_tfs = ["CTCF", "E2F1", "EGR1", "FOXA1", "FOXA2", "GABPA", "HNF4A", "JUND", 
              "MAX", "NANOG", "REST", "TAF1"]
anchor_overlap_tfs = set(epitome_tfs).intersection(set(anchor_tfs))
len(anchor_tfs), len(anchor_overlap_tfs), anchor_overlap_tfs

(12,
 9,
 {'CTCF', 'E2F1', 'EGR1', 'FOXA1', 'GABPA', 'JUND', 'MAX', 'REST', 'TAF1'})

### Train VLP Model With Motif Data

In [21]:
matrix, cellmap, assaymap = get_assays_from_feature_file(feature_path,
                                                         eligible_assays = TF,
                                                         eligible_cells = None, 
                                                         min_cells_per_assay = 2, 
                                                         min_assays_per_cell= 2) #10)

In [22]:
model = VLP(TF,
            data = data,
            matrix = matrix,
            cellmap = cellmap,
            assaymap = assaymap,
            motifmat = motifmat, 
            motifmap = motifmap)

start = timer()
model.train(train_iterations)
end = timer()
train_time = end-start
print('epitome train time %f' % train_time)

model_path = os.path.join(tf_model_dir, query_cell + "_" + TF + "_motif")
model.save(model_path)

using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(71.51683, shape=(), dtype=float32)tf.Tensor(32.367836, shape=(), dtype=float32)tf.Tensor(39.148994, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(43.75319, shape=(), dtype=float32)tf.Tensor(17.765388, shape=(), dtype=float32)tf.Tensor(25.9878, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(27.510347, shape=(), dtype=float32)tf.Tensor(10.572031, shape=(), dtype=float32)tf.Tensor(16.938316, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(27.68876, shape=(), dtype=float32)tf.Tensor(15.903814, shape=(), dtype=float32)tf.Tensor(11.7849455, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(24.891537, shape=(), dtype=float32)tf.Tensor(15.9134

In [23]:
model_results = model.test(test_iterations, calculate_metrics=True)
print('Model auROC: %s. Model auPRC: %s.' % (model_results['auROC'], model_results['auPRC'])) 

157it [00:39,  3.94it/s]

INFO:tensorflow:macro auROC:     0.9084975686826244
INFO:tensorflow:auPRC:     0.02531934997275975
INFO:tensorflow:GINI:     0.5623797544778633
Model auROC: 0.9084975686826244. Model auPRC: 0.02531934997275975.





In [24]:
eval_results_df = pd.DataFrame(columns=['query_cell', 'auROC', 'auPRC'])
eval_results_df = eval_results_df.append({ 
   'predicted_transcription_factor' : TF, #", ".join(anchor_overlap_tfs),
   'query_cell' : query_cell,
   'auROC' : model_results['auROC'],
   'auPRC' : model_results['auPRC'],
   'trained_transcription_factors' : TF, #", ".join(anchor_overlap_tfs),
   'iterations_trained' : train_iterations,
   'iterations_tested' : test_iterations,
   'train_time' : train_time,
    }, 
    ignore_index=True)
eval_results_df.to_csv(os.path.join(tf_results_dir, query_cell + "_" + TF + '_motif.csv'), 
                       sep="\t")

preds_file = os.path.join(tf_results_dir, query_cell + "_" + TF + '_motif.npz')
np.savez_compressed(preds_file ,pred=model_results['preds_mean'].numpy())

In [29]:
model_results['preds_mean'].numpy()

array([[0.03256017, 0.02350445],
       [0.12821168, 0.04365126],
       [0.064019  , 0.03120099],
       ...,
       [0.07639625, 0.03591267],
       [0.01666835, 0.0120222 ],
       [0.01700682, 0.00828141]], dtype=float32)

In [30]:
model_results

{'preds_mean': <tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
 array([[0.03256017, 0.02350445],
        [0.12821168, 0.04365126],
        [0.064019  , 0.03120099],
        ...,
        [0.07639625, 0.03591267],
        [0.01666835, 0.0120222 ],
        [0.01700682, 0.00828141]], dtype=float32)>,
 'preds_std': <tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
 array([[0.02154262, 0.01981107],
        [0.05854309, 0.02100998],
        [0.04072119, 0.02334389],
        ...,
        [0.03715775, 0.02083966],
        [0.01057944, 0.00872744],
        [0.0090576 , 0.00503344]], dtype=float32)>,
 'truth': <tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=float32)>,
 'weights': <tf.Tensor: shape=(10000, 2), dtype=float32, numpy=
 array([[1., 0.],
        [1., 0.],
        [0., 1.],
        ...,
        [1., 1.],
        [1., 0.],
        [1., 1.]], dtype=floa

### VLP Model Without Motif Data

In [26]:
matrix, cellmap, assaymap = get_assays_from_feature_file(feature_path,
                                                         eligible_assays = TF,
                                                         eligible_cells = None, 
                                                         min_cells_per_assay = 2, 
                                                         min_assays_per_cell= 2) #10)

In [27]:
model = VLP(TF,
            data = data,
            matrix = matrix,
            cellmap = cellmap,
            assaymap = assaymap)

start = timer()
model.train(train_iterations)
end = timer()
train_time = end-start
print('epitome train time %f' % train_time)

model_path = os.path.join(tf_model_dir, query_cell + "_" + TF + "_no_motif")
model.save(model_path)

using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(59.26118, shape=(), dtype=float32)tf.Tensor(33.030785, shape=(), dtype=float32)tf.Tensor(26.230398, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(42.590797, shape=(), dtype=float32)tf.Tensor(25.06586, shape=(), dtype=float32)tf.Tensor(17.524937, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(34.080284, shape=(), dtype=float32)tf.Tensor(22.320139, shape=(), dtype=float32)tf.Tensor(11.760146, shape=(), dtype=float32)


KeyboardInterrupt: 

In [None]:
model_results = model.test(test_iterations, calculate_metrics=True)
print('Model auROC: %s. Model auPRC: %s.' % (model_results['auROC'], model_results['auPRC'])) 

In [None]:
eval_results_df = pd.DataFrame(columns=['query_cell', 'auROC', 'auPRC'])
eval_results_df = eval_results_df.append({ 
   'predicted_transcription_factor' : TF, #", ".join(anchor_overlap_tfs),
   'query_cell' : query_cell,
   'auROC' : model_results['auROC'],
   'auPRC' : model_results['auPRC'],
   'trained_transcription_factors' : TF, #", ".join(anchor_overlap_tfs),
   'iterations_trained' : train_iterations,
   'iterations_tested' : test_iterations,
   'train_time' : train_time,
    }, 
    ignore_index=True)
eval_results_df.to_csv(os.path.join(tf_results_dir, query_cell + "_" + TF + '_no_motif.csv'), 
                       sep="\t")

preds_file = os.path.join(tf_results_dir, query_cell + "_" + TF + '_no_motif.npz')
np.savez_compressed(preds_file ,pred=model_results['preds_mean'].numpy())

### Train/ Evaluate on all Anchor TF's

In [58]:
for tf in anchor_overlap_tfs:
    # TF's being predicted
    TF = tf
    
    if TF == "CTCF":
        continue
    
    print("Training %s..." % TF)
    query_cell = 'K562' #'T47D'
    
    tf_results_dir, tf_model_dir = setup_directories(TF)
    
    matrix, cellmap, assaymap = get_assays_from_feature_file(feature_path,
                                                         eligible_assays = TF,
                                                         eligible_cells = None, 
                                                         min_cells_per_assay = 2, 
                                                         min_assays_per_cell= 2)
    
    # Train TF with Motif Data
    model = VLP(TF,
            data = data,
            matrix = matrix,
            cellmap = cellmap,
            assaymap = assaymap,
            motifmat = motifmat, 
            motifmap = motifmap)

    start = timer()
    model.train(train_iterations)
    end = timer()
    train_time = end-start
    print('%s Motif Train Time %f' % (TF, train_time))

    model_path = os.path.join(tf_model_dir, query_cell + "_" + TF + "_motif")
    model.save(model_path)
    
    # Test TF with Motif Model
    model_results = model.test(test_iterations, calculate_metrics=True)
#     print('%s: auROC: %s. auPRC: %s.' % (TF, model_results['auROC'], model_results['auPRC'])) 
    
    eval_results_df = pd.DataFrame(columns=['query_cell', 'auROC', 'auPRC'])
    eval_results_df = eval_results_df.append({ 
       'predicted_transcription_factor' : TF,
       'query_cell' : query_cell,
       'auROC' : model_results['auROC'],
       'auPRC' : model_results['auPRC'],
       'trained_transcription_factors' : TF,
       'iterations_trained' : train_iterations,
       'iterations_tested' : test_iterations,
       'train_time' : train_time,
        }, 
        ignore_index=True)
    eval_results_df.to_csv(os.path.join(tf_results_dir, query_cell + "_" + TF + '_motif.csv'), 
                           sep="\t")

    preds_file = os.path.join(tf_results_dir, query_cell + "_" + TF + '_motif.npz')
    np.savez_compressed(preds_file ,pred=model_results['preds_mean'].numpy())
    
    # Train TF without Motif Data
    model = VLP(TF,
            data = data,
            matrix = matrix,
            cellmap = cellmap,
            assaymap = assaymap)

    start = timer()
    model.train(train_iterations)
    end = timer()
    train_time = end-start
    print('%s Non-Motif Train Time %f' % (TF, train_time))

    model_path = os.path.join(tf_model_dir, query_cell + "_" + TF + "_no_motif")
    model.save(model_path)
    
    # Test TF with Non-Motif Model
    model_results = model.test(test_iterations, calculate_metrics=True)
#     print('%s: auROC: %s. auPRC: %s.' % (TF, model_results['auROC'], model_results['auPRC'])) 
    
    eval_results_df = pd.DataFrame(columns=['query_cell', 'auROC', 'auPRC'])
    eval_results_df = eval_results_df.append({ 
       'predicted_transcription_factor' : TF,
       'query_cell' : query_cell,
       'auROC' : model_results['auROC'],
       'auPRC' : model_results['auPRC'],
       'trained_transcription_factors' : TF,
       'iterations_trained' : train_iterations,
       'iterations_tested' : test_iterations,
       'train_time' : train_time,
        }, 
        ignore_index=True)
    eval_results_df.to_csv(os.path.join(tf_results_dir, query_cell + "_" + TF + '_no_motif.csv'), 
                           sep="\t")

    preds_file = os.path.join(tf_results_dir, query_cell + "_" + TF + '_no_motif.npz')
    np.savez_compressed(preds_file ,pred=model_results['preds_mean'].numpy())

Training JUND...
using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(67.876144, shape=(), dtype=float32)tf.Tensor(28.68024, shape=(), dtype=float32)tf.Tensor(39.195908, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(39.79464, shape=(), dtype=float32)tf.Tensor(13.760158, shape=(), dtype=float32)tf.Tensor(26.034485, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(34.20086, shape=(), dtype=float32)tf.Tensor(17.108875, shape=(), dtype=float32)tf.Tensor(17.091986, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(32.006634, shape=(), dtype=float32)tf.Tensor(19.966705, shape=(), dtype=float32)tf.Tensor(12.039928, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(25.870155, shape=(), dtype=float32

0it [00:00, ?it/s]

JUND Motif Train Time 1687.140229


157it [00:40,  3.89it/s]

INFO:tensorflow:macro auROC:     0.9284678551855514
INFO:tensorflow:auPRC:     0.012592098172470794
INFO:tensorflow:GINI:     0.7503683369564035





using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['T47D', 'SK-N-SH', 'MCF-7', 'K562', 'HepG2', 'HeLa-S3', 'HCT116', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(57.69705, shape=(), dtype=float32)tf.Tensor(31.45293, shape=(), dtype=float32)tf.Tensor(26.24412, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(35.459976, shape=(), dtype=float32)tf.Tensor(17.961977, shape=(), dtype=float32)tf.Tensor(17.498, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(32.189484, shape=(), dtype=float32)tf.Tensor(20.608858, shape=(), dtype=float32)tf.Tensor(11.580624, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(33.249386, shape=(), dtype=float32)tf.Tensor(24.942856, shape=(), dtype=float32)tf.Tensor(8.306529, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(25.370712, shape=(), dtype=float32)tf.Tensor(18.799538,

0it [00:00, ?it/s]

JUND Non-Motif Train Time 1690.779980


157it [00:40,  3.84it/s]

INFO:tensorflow:macro auROC:     0.8755799805444708
INFO:tensorflow:auPRC:     0.008243270584466236
INFO:tensorflow:GINI:     0.5701085785925561





Training TAF1...
using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12892', 'GM12891', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12892', 'GM12891', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(68.0641, shape=(), dtype=float32)tf.Tensor(44.188812, shape=(), dtype=float32)tf.Tensor(23.875292, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(33.54499, shape=(), dtype=float32)tf.Tensor(17.74875, shape=(), dtype=float32)tf.Tensor(15.796238, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(11.015478, shape=(), dtype=float32)tf.Tensor(0.55997014, shape=(), dtype=float32)tf.Tensor(10.455508, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(7.5051203, shape=(), dtype=float32)tf.Tensor(0.36048096, shape=(), dtype=float32)tf.Tensor(7.1446395, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(6.2515893, shape=(), dtype=float32)tf.Tenso

0it [00:00, ?it/s]

TAF1 Motif Train Time 1635.290672


157it [00:38,  4.08it/s]

INFO:tensorflow:macro auROC:     0.9861794511884747
INFO:tensorflow:auPRC:     0.5499812401502298
INFO:tensorflow:GINI:     0.9723588948453479





using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12892', 'GM12891', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12892', 'GM12891', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(65.98341, shape=(), dtype=float32)tf.Tensor(43.88282, shape=(), dtype=float32)tf.Tensor(22.100597, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(15.364998, shape=(), dtype=float32)tf.Tensor(0.7100724, shape=(), dtype=float32)tf.Tensor(14.654925, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(10.499827, shape=(), dtype=float32)tf.Tensor(0.8507942, shape=(), dtype=float32)tf.Tensor(9.649034, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(6.849214, shape=(), dtype=float32)tf.Tensor(0.28513256, shape=(), dtype=float32)tf.Tensor(6.5640817, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(21.0724, shape=(), dtype=float32)tf.Tensor(16.057003, shape=(

0it [00:00, ?it/s]

TAF1 Non-Motif Train Time 1624.671781


157it [00:39,  4.02it/s]

INFO:tensorflow:macro auROC:     0.989302956918758
INFO:tensorflow:auPRC:     0.5992221783919084
INFO:tensorflow:GINI:     0.9786058987743133





Training E2F1...
using ['K562', 'HeLa-S3'] as labels for mode Dataset.TRAIN
using ['K562', 'HeLa-S3'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(44.86111, shape=(), dtype=float32)tf.Tensor(39.538765, shape=(), dtype=float32)tf.Tensor(5.322346, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(13.230371, shape=(), dtype=float32)tf.Tensor(9.392074, shape=(), dtype=float32)tf.Tensor(3.838298, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(4.413699, shape=(), dtype=float32)tf.Tensor(1.5463245, shape=(), dtype=float32)tf.Tensor(2.8673744, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(7.744392, shape=(), dtype=float32)tf.Tensor(5.3292136, shape=(), dtype=float32)tf.Tensor(2.4151783, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(3.4688468, shape=(), dtype=float32)tf.Tensor(1.2941073, shape=(), dtype=float32)tf.Tensor(2.1747394, shape=(), dtype=float32)


0it [00:00, ?it/s]

E2F1 Motif Train Time 688.988659


157it [00:18,  8.72it/s]

INFO:tensorflow:macro auROC:     0.855065130260521
INFO:tensorflow:auPRC:     0.008199452897898531
INFO:tensorflow:GINI:     0.7101302213802605





using ['K562', 'HeLa-S3'] as labels for mode Dataset.TRAIN
using ['K562', 'HeLa-S3'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(45.836567, shape=(), dtype=float32)tf.Tensor(40.940758, shape=(), dtype=float32)tf.Tensor(4.895809, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(14.435993, shape=(), dtype=float32)tf.Tensor(10.822944, shape=(), dtype=float32)tf.Tensor(3.6130495, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(4.0258665, shape=(), dtype=float32)tf.Tensor(1.2343924, shape=(), dtype=float32)tf.Tensor(2.7914739, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(4.4870615, shape=(), dtype=float32)tf.Tensor(2.1164548, shape=(), dtype=float32)tf.Tensor(2.3706064, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(5.741802, shape=(), dtype=float32)tf.Tensor(3.6087534, shape=(), dtype=float32)tf.Tensor(2.1330488, shape=(), dtype=float32)


0it [00:00, ?it/s]

E2F1 Non-Motif Train Time 694.783370


157it [00:17,  8.81it/s]

INFO:tensorflow:macro auROC:     0.838056112224449
INFO:tensorflow:auPRC:     0.009855910209045296
INFO:tensorflow:GINI:     0.6761121853081162





Training GABPA...
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(60.636604, shape=(), dtype=float32)tf.Tensor(44.75963, shape=(), dtype=float32)tf.Tensor(15.876978, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(11.765964, shape=(), dtype=float32)tf.Tensor(0.9757925, shape=(), dtype=float32)tf.Tensor(10.790171, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(11.732292, shape=(), dtype=float32)tf.Tensor(4.286315, shape=(), dtype=float32)tf.Tensor(7.445977, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(27.555895, shape=(), dtype=float32)tf.Tensor(22.123789, shape=(), dtype=float32)tf.Tensor(5.4321055, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(4.5864677, shape=(), dtype=float32)tf.Tensor(0.3036614, shape=(), dtype=float32)tf.Tensor(4.2828064, shape=()

0it [00:00, ?it/s]

GABPA Motif Train Time 1229.998796


157it [00:30,  5.18it/s]


INFO:tensorflow:macro auROC:     0.8138274077783342
INFO:tensorflow:auPRC:     0.018779216425572485
INFO:tensorflow:GINI:     0.627654861198047
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(60.26727, shape=(), dtype=float32)tf.Tensor(45.584015, shape=(), dtype=float32)tf.Tensor(14.683255, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(10.760666, shape=(), dtype=float32)tf.Tensor(0.6522289, shape=(), dtype=float32)tf.Tensor(10.108437, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(7.823071, shape=(), dtype=float32)tf.Tensor(0.70869935, shape=(), dtype=float32)tf.Tensor(7.114372, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(29.458742, shape=(), dtype=float32)tf.Tensor(24.148067, shape=(), dtype=float32)tf.Tensor(5.310674, shape=(), dtype=float32)
INFO:tensorflo

0it [00:00, ?it/s]

GABPA Non-Motif Train Time 1242.608600


157it [00:29,  5.27it/s]

INFO:tensorflow:macro auROC:     0.8204707060590887
INFO:tensorflow:auPRC:     0.0214142408314604
INFO:tensorflow:GINI:     0.6409414414590636





Training FOXA1...
using ['T47D', 'MCF-7', 'K562', 'Ishikawa', 'HepG2', 'HEK293T', 'A549'] as labels for mode Dataset.TRAIN
using ['T47D', 'MCF-7', 'K562', 'Ishikawa', 'HepG2', 'HEK293T', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(65.80141, shape=(), dtype=float32)tf.Tensor(47.278755, shape=(), dtype=float32)tf.Tensor(18.522655, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(17.53945, shape=(), dtype=float32)tf.Tensor(5.440012, shape=(), dtype=float32)tf.Tensor(12.099438, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(14.157874, shape=(), dtype=float32)tf.Tensor(6.7226853, shape=(), dtype=float32)tf.Tensor(7.435189, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(23.129192, shape=(), dtype=float32)tf.Tensor(18.011246, shape=(), dtype=float32)tf.Tensor(5.1179457, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(22.354486, shape=(), dtype=float32)tf.Tensor(18.340899, shape=(), dtype=float32)tf.Ten

0it [00:00, ?it/s]

FOXA1 Motif Train Time 1373.143633


157it [00:33,  4.75it/s]


INFO:tensorflow:macro auROC:     0.7872826433402982
INFO:tensorflow:auPRC:     0.10814941758198923
INFO:tensorflow:GINI:     0.574565289562638
using ['T47D', 'MCF-7', 'K562', 'Ishikawa', 'HepG2', 'HEK293T', 'A549'] as labels for mode Dataset.TRAIN
using ['T47D', 'MCF-7', 'K562', 'Ishikawa', 'HepG2', 'HEK293T', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(59.749985, shape=(), dtype=float32)tf.Tensor(42.55362, shape=(), dtype=float32)tf.Tensor(17.196365, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(16.04166, shape=(), dtype=float32)tf.Tensor(4.834282, shape=(), dtype=float32)tf.Tensor(11.207379, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(19.55907, shape=(), dtype=float32)tf.Tensor(12.650824, shape=(), dtype=float32)tf.Tensor(6.908246, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(11.73516, shape=(), dtype=float32)tf.Tensor(6.942775, shape=(), dtype=float32)tf.Tensor(4.792385, shape=(), dtype=float

0it [00:00, ?it/s]

FOXA1 Non-Motif Train Time 1378.620798


157it [00:32,  4.81it/s]

INFO:tensorflow:macro auROC:     0.7866097673265573
INFO:tensorflow:auPRC:     0.11313550948607104
INFO:tensorflow:GINI:     0.5732195519453642





Training REST...
using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(68.20203, shape=(), dtype=float32)tf.Tensor(49.606087, shape=(), dtype=float32)tf.Tensor(18.595943, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(13.574488, shape=(), dtype=float32)tf.Tensor(1.060781, shape=(), dtype=float32)tf.Tensor(12.513706, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(20.345335, shape=(), dtype=float32)tf.Tensor(11.98852, shape=(), dtype=float32)tf.Tensor(8.356815, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(21.086687, shape=(), dtype=float32)tf.Tensor(15.026542, shape=(), dtype=float32)tf.Tensor(6.0601454, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(33.946106, shape=(), dtype=float32)tf.Tensor(29.082403, shape=(), dtype=float32)tf.Tensor

0it [00:00, ?it/s]

REST Motif Train Time 1371.555836


157it [00:32,  4.79it/s]

INFO:tensorflow:macro auROC:     0.7971319695146409
INFO:tensorflow:auPRC:     0.05331553258418627
INFO:tensorflow:GINI:     0.5942638900640543





using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['SK-N-SH', 'K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(66.57971, shape=(), dtype=float32)tf.Tensor(49.43197, shape=(), dtype=float32)tf.Tensor(17.147745, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(13.869762, shape=(), dtype=float32)tf.Tensor(2.3822074, shape=(), dtype=float32)tf.Tensor(11.4875555, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(8.842641, shape=(), dtype=float32)tf.Tensor(1.098128, shape=(), dtype=float32)tf.Tensor(7.744513, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(6.6615205, shape=(), dtype=float32)tf.Tensor(0.91025835, shape=(), dtype=float32)tf.Tensor(5.751262, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(13.341753, shape=(), dtype=float32)tf.Tensor(8.624917, shape=(), dtype=float32)tf.Tensor(4.716836, shape=(

0it [00:00, ?it/s]

REST Non-Motif Train Time 1373.982441


157it [00:32,  4.84it/s]


INFO:tensorflow:macro auROC:     0.8005522606154376
INFO:tensorflow:auPRC:     0.09675764243203298
INFO:tensorflow:GINI:     0.6011045771911352
Training MAX...
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(62.72473, shape=(), dtype=float32)tf.Tensor(46.8216, shape=(), dtype=float32)tf.Tensor(15.90313, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(21.053509, shape=(), dtype=float32)tf.Tensor(10.350472, shape=(), dtype=float32)tf.Tensor(10.703036, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(7.6883082, shape=(), dtype=float32)tf.Tensor(0.56106055, shape=(), dtype=float32)tf.Tensor(7.127248, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(5.913164, shape=(), dtype=float32)tf.Tensor(0.6911288, shape=(), dtype=float32)tf.Tensor(5.2220354, shape=(), dtype=float32)


0it [00:00, ?it/s]

MAX Motif Train Time 1240.037551


157it [00:30,  5.23it/s]

INFO:tensorflow:macro auROC:     0.827372239546872
INFO:tensorflow:auPRC:     0.1489887172820554
INFO:tensorflow:GINI:     0.6547444661458334





using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.TRAIN
using ['K562', 'HepG2', 'HeLa-S3', 'H1', 'GM12878', 'A549'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(59.076626, shape=(), dtype=float32)tf.Tensor(44.361847, shape=(), dtype=float32)tf.Tensor(14.714779, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(11.081548, shape=(), dtype=float32)tf.Tensor(1.2275722, shape=(), dtype=float32)tf.Tensor(9.853975, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(7.1148415, shape=(), dtype=float32)tf.Tensor(0.5644128, shape=(), dtype=float32)tf.Tensor(6.550429, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(9.76866, shape=(), dtype=float32)tf.Tensor(4.9072995, shape=(), dtype=float32)tf.Tensor(4.86136, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(31.569555, shape=(), dtype=float32)tf.Tensor(27.602213, shape=(), dtype=float32)tf.Tensor(3.967343, shape=(), dtype=float32)


0it [00:00, ?it/s]

MAX Non-Motif Train Time 1234.595524


157it [00:29,  5.26it/s]


INFO:tensorflow:macro auROC:     0.8408231506820254
INFO:tensorflow:auPRC:     0.15878261920286174
INFO:tensorflow:GINI:     0.6816463324390367
Training EGR1...
using ['K562', 'H1', 'GM12878'] as labels for mode Dataset.TRAIN
using ['K562', 'H1', 'GM12878'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(53.79943, shape=(), dtype=float32)tf.Tensor(45.84763, shape=(), dtype=float32)tf.Tensor(7.9518027, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(8.290697, shape=(), dtype=float32)tf.Tensor(2.5742066, shape=(), dtype=float32)tf.Tensor(5.7164907, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(10.526819, shape=(), dtype=float32)tf.Tensor(6.291806, shape=(), dtype=float32)tf.Tensor(4.2350135, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(3.9078176, shape=(), dtype=float32)tf.Tensor(0.47180915, shape=(), dtype=float32)tf.Tensor(3.4360085, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(6.499288, shape=(), dt

0it [00:00, ?it/s]

EGR1 Motif Train Time 830.610713


157it [00:21,  7.38it/s]

INFO:tensorflow:macro auROC:     0.8680971210042676
INFO:tensorflow:auPRC:     0.2833736567983777
INFO:tensorflow:GINI:     0.7361942438201395





using ['K562', 'H1', 'GM12878'] as labels for mode Dataset.TRAIN
using ['K562', 'H1', 'GM12878'] as labels for mode Dataset.VALID
INFO:tensorflow:Starting Training
INFO:tensorflow:0 tf.Tensor(50.697414, shape=(), dtype=float32)tf.Tensor(43.30559, shape=(), dtype=float32)tf.Tensor(7.391823, shape=(), dtype=float32)
INFO:tensorflow:1000 tf.Tensor(24.07993, shape=(), dtype=float32)tf.Tensor(18.815435, shape=(), dtype=float32)tf.Tensor(5.264493, shape=(), dtype=float32)
INFO:tensorflow:2000 tf.Tensor(8.728676, shape=(), dtype=float32)tf.Tensor(4.8418174, shape=(), dtype=float32)tf.Tensor(3.886859, shape=(), dtype=float32)
INFO:tensorflow:3000 tf.Tensor(3.5481765, shape=(), dtype=float32)tf.Tensor(0.42921558, shape=(), dtype=float32)tf.Tensor(3.1189609, shape=(), dtype=float32)
INFO:tensorflow:4000 tf.Tensor(11.482698, shape=(), dtype=float32)tf.Tensor(8.733459, shape=(), dtype=float32)tf.Tensor(2.749239, shape=(), dtype=float32)


0it [00:00, ?it/s]

EGR1 Non-Motif Train Time 822.888214


157it [00:21,  7.38it/s]

INFO:tensorflow:macro auROC:     0.866631605601979
INFO:tensorflow:auPRC:     0.28948371621693886
INFO:tensorflow:GINI:     0.7332631740660701



