### Install dependencies

In [82]:
%env HDF5_USE_FILE_LOCKING=FALSE

env: HDF5_USE_FILE_LOCKING=FALSE


In [83]:
import bpnet
from bpnet.cli.contrib import ContribFile
from bpnet.plot.tracks import plot_tracks, to_neg

import os
import uuid
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import clear_output, HTML
from pathlib import Path
import pandas as pd
import numpy as np
clear_output()

#### Optional: Setup wandb

In [84]:
import wandb

wandb.init(project='bpnet-training', entity='an1lam')

2020-09-02 16:18:47,859 [INFO] system metrics and metadata threads started
2020-09-02 16:18:47,860 [INFO] checking resume status, waiting at most 10 seconds
2020-09-02 16:18:47,942 [INFO] resuming run from id: UnVuOnYxOjJ4d2M5aDdpOmJwbmV0LXRyYWluaW5nOmFuMWxhbQ==
2020-09-02 16:18:47,948 [INFO] upserting run before process can begin, waiting at most 10 seconds
2020-09-02 16:18:48,058 [INFO] saving pip packages
2020-09-02 16:18:48,060 [INFO] initializing streaming files api
2020-09-02 16:18:48,061 [INFO] unblocking file change observer, beginning sync with W&B servers


W&B Run: https://app.wandb.ai/an1lam/bpnet-training/runs/2xwc9h7i

2020-09-02 16:18:48,066 [INFO] shutting down system stats and metadata service
2020-09-02 16:18:48,784 [INFO] file/dir modified: /home/stephenmalina/project/src/wandb/run-20200902_161847-2xwc9h7i/config.yaml
2020-09-02 16:18:48,861 [INFO] stopping streaming files and file change observer
2020-09-02 16:18:48,873 [INFO] file/dir created: /home/stephenmalina/project/src/wandb/run-20200902_161847-2xwc9h7i/wandb-summary.json
2020-09-02 16:18:48,875 [INFO] file/dir created: /home/stephenmalina/project/src/wandb/run-20200902_161847-2xwc9h7i/wandb-metadata.json
2020-09-02 16:18:48,880 [INFO] file/dir created: /home/stephenmalina/project/src/wandb/run-20200902_161847-2xwc9h7i/wandb-history.jsonl
2020-09-02 16:18:48,882 [INFO] file/dir created: /home/stephenmalina/project/src/wandb/run-20200902_161847-2xwc9h7i/requirements.txt
2020-09-02 16:18:48,884 [INFO] file/dir created: /home/stephenmalina/project/src/wandb/run-20200902_161847-2xwc9h7i/wandb-events.jsonl
2020-09-02 16:18:48,891 [INFO] file/

In [85]:
# config variables
n_reps = 5

# file paths
config_dir = Path('./bpnet/') 

model_config_fname = 'ChIP-nexus-default.gin'
data_config_fname = 'ChIP-nexus.dataspec.yml'

timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
output_dir = f'/home/stephenmalina/project/dat/res-bpnet-training-{timestamp}'
output_dir

'/home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50'

In [86]:
os.makedirs(output_dir, exist_ok=True)
os.makedirs(os.path.join(output_dir, 'output_ensemble'), exist_ok=True)

In [87]:
!cat {config_dir}/{data_config_fname}

task_specs:
  Oct4:
    tracks:
    - /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Oct4/counts.pos.bw
    - /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Oct4/counts.neg.bw
    peaks: /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Oct4/idr-optimal-set.summit.bed.gz
  Sox2:
    tracks:
    - /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Sox2/counts.pos.bw
    - /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Sox2/counts.neg.bw
    peaks: /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Sox2/idr-optimal-set.summit.bed.gz
  Nanog:
    tracks:
    - /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Nanog/counts.pos.bw
    - /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Nanog/counts.neg.bw
    peaks: /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Nanog/idr-optimal-set.summi

### Data stats

In [88]:
# chromsomome names of differnet peaks
!zcat /home/stephenmalina/project/dat/bpnet-manuscript-data/data/chip-nexus/Sox2/idr-optimal-set.summit.bed.gz \
    | cut -f 1 | sort -u

chr1
chr10
chr11
chr12
chr13
chr14
chr15
chr16
chr17
chr18
chr19
chr2
chr3
chr4
chr5
chr6
chr7
chr8
chr9
chrX
chrY


Each task (or TF) can specify a set of peaks associated with it. Here are the number of peaks per TF we will use in this tutorial:

In [89]:
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']

# number of peaks per task
for task in tasks:
    print(task)
    data_dir = '/home/stephenmalina/project/dat/bpnet-manuscript-data'
    !zcat {data_dir}/data/chip-nexus/{task}/idr-optimal-set.summit.bed.gz | wc -l

Oct4
25849
Sox2
10999
Nanog
56459
Klf4
57601


## 2. Train the model

Having specified `dataspec.yml`, we are now ready to train the model with 

```
bpnet train <dataspec.yml> <output dir> [optional flags]`
```


We will use a pre-made model [bpnet9](../bpnet/premade/bpnet9.gin) as a starting point and modify a few parameters specified in the config.gin file. Specifically, we will 
- train the model only on chromosomes 16-19
- evaluate the model on chromosome 2
- use only 3 layers of dilated convolutions 
- use an input sequence length of 200 bp and accordingly lower the augmentation shift to 100 bp

In [90]:
!cat {config_dir}/{model_config_fname} 
# NOTE: test_chr will be also excluded similar to 'exclude_chr'

b_loss_weight = 0
c_loss_weight = 10
p_loss_weight = 1
filters = 64
tconv_kernel_size = 25
lr = 0.004
n_dil_layers = 9
train.batch_size = 128
merge_profile_reg = False
dataspec = 'ChIP-nexus.dataspec.yml'

batchnorm = False

padding = 'same'
seq_width = 1000

tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']


Have a look at the original gin file of bpnet9 here: https://github.com/kundajelab/bpnet/blob/master/bpnet/premade/bpnet9-ginspec.gin. For more information on using gin files see <https://github.com/google/gin-config>. 

To track model training and evaluation, we will use [wandb](http://wandb.com/) by adding `--wandb=avsec/bpnet-demo` to `bpnet train`. You can navigate to https://app.wandb.ai/avsec/bpnet-demo to see the training progress.

Let's train!

In [91]:
# setup all the file paths
example_model_dir = os.path.join(output_dir, 'output_ensemble', '0')

In [92]:
# Train for at most 10 epochs
for i in range(n_reps):
    # setup a new run_id (could be done automatically, but then the output directory would change)
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "_" + str(uuid.uuid4())
    !cd {config_dir} && bpnet train {data_config_fname} --premade=bpnet9 \
        --config={model_config_fname} {output_dir} \
        --run-id '{run_id}' --wandb=an1lam/bpnet-training \
        --override='train.epochs=10; train.seed={i}'
    # softlink the new output directory
    !rm -rf {output_dir}/output_ensemble/{i} && ln -srf {output_dir}/{run_id} {output_dir}/output_ensemble/{i}

Using TensorFlow backend.
2020-09-02 16:18:52.941582: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0


2020-09-02 16:18:54,763 [INFO] NumExpr defaulting to 8 threads.
The mpl_toolkits.axes_grid1.colorbar module was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use matplotlib.colorbar instead.
  from mpl_toolkits.axes_grid1.colorbar import colorbar
INFO [09-02 16:18:56] Using wandb. Running wandb.init()
wandb: Tracking run with wandb version 0.9.6
wandb: Run data is saved locally in ../../dat/res-bpnet-training-2020-09-02-16-18-50/run-20200902_161856-2020-09-02_16-18-51_d4fb7dfe-59f4-4bfb-9f90-b418a36664ac
wandb: Syncing run 2020-09-02_16-18-51_d4fb7dfe-59f4-4bfb-9f90-b418a36664ac
wandb: ⭐️ View project at https://app.wandb.ai/an1lam/bpnet-training
wandb: 🚀 View run at https://app.wandb.ai/an1lam/bpnet-training/runs/2020-09-02_16-18-51_d4fb7dfe-59f4-4bfb-9f90-b418a36664ac
wandb: Run `wand

Epoch 1/10
2020-09-02 16:19:10.195726: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-09-02 16:19:10.423217: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


Epoch 9/10
Epoch 10/10
INFO [09-02 16:52:00] Evaluating dataset: valid-peaks
229it [00:37,  6.15it/s]                                                        
  fracs = yt / yt.sum(axis=1, keepdims=True)
  is_peak = (fracs >= pos_min_threshold).astype(float)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
INFO [09-02 16:54:09] Evaluating dataset: train-peaks
711it [01:52,  6.32it/s]                                                        
INFO [09-02 17:01:02] Saved metrics to /home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50/2020-09-02_16-18-51_d4fb7dfe-59f4-4bfb-9f90-b418a36664ac/evaluation.valid.json
INFO [09-02 17:01:02] Done!
----------------------------------------
Final metrics: 
{
  "valid-peaks": {
    "Oct4/profile/binsize=1/auprc": 0.174503907210406,
    "Oct4/profile/binsize=1/random_auprc": 0.002968693730871131,
    "Oct4/profile/binsize=1/n_positives": 49840,

INFO [09-02 17:01:02] Running the evaluation report
Executing:   3%|█                              | 1/29 [00:01<00:35,  1.25s/cell]2020-09-02 17:01:05.260033: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
Executing:  10%|███▏                           | 3/29 [00:03<00:33,  1.28s/cell]2020-09-02 17:01:06.817030: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-09-02 17:01:06.817825: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55dfaa22cc30 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-02 17:01:06.817857: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-09-02 17:01:06.820802: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-02 17:01:06.888152: I tensorflow/stream_executor/cuda/cu

wandb: Waiting for W&B process to finish, PID 4325
 'Sox2/profile/binsize=10/imbalance': 0.05612442985432679, 'Sox2/counts/mse': 0.25067776, 'Sox2/counts/var_explained': 0.3330099582672119, 'Sox2/counts/pearsonr': 0.5771318501153297, 'Sox2/counts/spearmanr': 0.5396412516368724, 'Sox2/counts/mad': 0.40291572, 'Nanog/profile/binsize=1/auprc': 0.45204433154678675, 'Nanog/profile/binsize=1/random_auprc': 0.005396128595434531, 'Nanog/profile/binsize=1/n_positives': 411441, 'Nanog/profile/binsize=1/frac_ambigous': 0.058152092850969256, 'Nanog/profile/binsize=1/imbalance': 0.005393806853993239, 'Nanog/profile/binsize=10/auprc': 0.7266661488124783, 'Nanog/profile/binsize=10/random_auprc': 0.040264529325654864, 'Nanog/profile/binsize=10/n_positives': 247668, 'Nanog/profile/binsize=10/frac_ambigous': 0.24060291393999259, 'Nanog/profile/binsize=10/imbalance': 0.04026888195270616, 'Nanog/counts/mse': 0.67656076, 'Nanog/counts/var_explained': 0.35717469453811646, 'Nanog/counts/pearsonr': 0.59867331

wandb:                          eval/valid-peaks/Sox2/counts/mse 0.257840096950531
wandb:                eval/valid-peaks/Sox2/counts/var_explained 0.34099650382995605
wandb:                     eval/valid-peaks/Sox2/counts/pearsonr 0.5840843933014306
wandb:                    eval/valid-peaks/Sox2/counts/spearmanr 0.5404238864721982
wandb:                          eval/valid-peaks/Sox2/counts/mad 0.4089759886264801
wandb:            eval/valid-peaks/Nanog/profile/binsize=1/auprc 0.4252083470656009
wandb:     eval/valid-peaks/Nanog/profile/binsize=1/random_auprc 0.005343811759237785
wandb:      eval/valid-peaks/Nanog/profile/binsize=1/n_positives 131142
wandb:    eval/valid-peaks/Nanog/profile/binsize=1/frac_ambigous 0.05829807838371563
wandb:        eval/valid-peaks/Nanog/profile/binsize=1/imbalance 0.0052886460179792675
wandb:           eval/valid-peaks/Nanog/profile/binsize=10/auprc 0.7013914968555809
wandb:    eval/valid-peaks/Nanog/profile/binsize=10/random_auprc 0.039938396082472

wandb:         eval/train-peaks/Klf4/profile/binsize=1/imbalance 0.002606649279519673
wandb:            eval/train-peaks/Klf4/profile/binsize=10/auprc 0.5087873394331355
wandb:     eval/train-peaks/Klf4/profile/binsize=10/random_auprc 0.027420295513022573
wandb:      eval/train-peaks/Klf4/profile/binsize=10/n_positives 150069
wandb:    eval/train-peaks/Klf4/profile/binsize=10/frac_ambigous 0.31857240662322184
wandb:        eval/train-peaks/Klf4/profile/binsize=10/imbalance 0.027921413915439248
wandb:                          eval/train-peaks/Klf4/counts/mse 0.5285587310791016
wandb:                eval/train-peaks/Klf4/counts/var_explained 0.38523656129837036
wandb:                     eval/train-peaks/Klf4/counts/pearsonr 0.6237245253056076
wandb:                    eval/train-peaks/Klf4/counts/spearmanr 0.6162512945104137
wandb:                          eval/train-peaks/Klf4/counts/mad 0.6056191921234131
wandb:              eval/train-peaks/avg/profile/binsize=1/auprc 0.3153496695015

2020-09-02 17:03:09.663571: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 5148 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)
INFO [09-02 17:03:09] Using the following premade configuration: bpnet9
INFO [09-02 17:03:09] Using the following config.gin files: ChIP-nexus-default.gin
TF-MoDISco is using the TensorFlow backend.
Used config: ----------------------------------------
import bpnet
import bpnet.configurables
import bpnet.datasets
import bpnet.heads
import bpnet.layers
import bpnet.losses
import bpnet.metrics
import bpnet.models
import bpnet.seqmodel
import bpnet.trainers

# Macros:
augment_interval = True
batchnorm = False
dataspec = 'ChIP-nexus.dataspec.yml'
exclude_chr = ['chrX', 'chrY']
filters = 64
lambda = 10
lr = 0.004
n_bias_tracks = 2
n_dil_layers = 9
seq_width = 1000
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
tconv_kernel_siz

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO [09-02 17:36:09] Evaluating dataset: valid-peaks
229it [00:37,  6.17it/s]                                                        
  fracs = yt / yt.sum(axis=1, keepdims=True)
  is_peak = (fracs >= pos_min_threshold).astype(float)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
INFO [09-02 17:38:15] Evaluating dataset: train-peaks
711it [01:52,  6.32it/s]                                                        
INFO [09-02 17:45:06] Saved metrics to /home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50/2020-09-02_17-03-02_39a804db-4073-4f07-90cb-74e7e8cef50e/evaluation.valid.json
INFO [09-02 17:45:06] Done!
----------------------------------------
Final metrics: 
{
  "valid-peaks": {
    "Oct4/profile/binsize=1/auprc": 0.17523543113237355,
    "Oct4/profile/binsize=1/random_auprc": 0.0029255944118368807,
    "Oct4/profile/binsize=1/n_positives": 49809,
    "Oct4/profile/b

INFO [09-02 17:45:06] Running the evaluation report
Executing:   3%|█                              | 1/29 [00:01<00:34,  1.25s/cell]2020-09-02 17:45:09.266192: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
Executing:  10%|███▏                           | 3/29 [00:03<00:33,  1.28s/cell]2020-09-02 17:45:10.799745: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-09-02 17:45:10.800420: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x562d00d02de0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-02 17:45:10.800453: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-09-02 17:45:10.803345: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-02 17:45:10.872121: I tensorflow/stream_executor/cuda/cu

wandb: Waiting for W&B process to finish, PID 4916
/profile/binsize=10/imbalance': 0.055914906978569746, 'Sox2/counts/mse': 0.21784697, 'Sox2/counts/var_explained': 0.32793182134628296, 'Sox2/counts/pearsonr': 0.5743149635097091, 'Sox2/counts/spearmanr': 0.5409065510892215, 'Sox2/counts/mad': 0.3427713, 'Nanog/profile/binsize=1/auprc': 0.4406191791791741, 'Nanog/profile/binsize=1/random_auprc': 0.005281582792955107, 'Nanog/profile/binsize=1/n_positives': 411621, 'Nanog/profile/binsize=1/frac_ambigous': 0.058150165546550704, 'Nanog/profile/binsize=1/imbalance': 0.005399222134558567, 'Nanog/profile/binsize=10/auprc': 0.7199142470171072, 'Nanog/profile/binsize=10/random_auprc': 0.03950073725624159, 'Nanog/profile/binsize=10/n_positives': 247756, 'Nanog/profile/binsize=10/frac_ambigous': 0.24055223364301245, 'Nanog/profile/binsize=10/imbalance': 0.04030339301615643, 'Nanog/counts/mse': 0.5680249, 'Nanog/counts/var_explained': 0.3541957139968872, 'Nanog/counts/pearsonr': 0.5958914079512381,

wandb:                          eval/valid-peaks/Sox2/counts/mse 0.2161509096622467
wandb:                eval/valid-peaks/Sox2/counts/var_explained 0.3349761962890625
wandb:                     eval/valid-peaks/Sox2/counts/pearsonr 0.5804694010695272
wandb:                    eval/valid-peaks/Sox2/counts/spearmanr 0.5426375512322759
wandb:                          eval/valid-peaks/Sox2/counts/mad 0.34232160449028015
wandb:            eval/valid-peaks/Nanog/profile/binsize=1/auprc 0.41345618211835095
wandb:     eval/valid-peaks/Nanog/profile/binsize=1/random_auprc 0.005260052867601725
wandb:      eval/valid-peaks/Nanog/profile/binsize=1/n_positives 130741
wandb:    eval/valid-peaks/Nanog/profile/binsize=1/frac_ambigous 0.05821099802521647
wandb:        eval/valid-peaks/Nanog/profile/binsize=1/imbalance 0.005271987131191554
wandb:           eval/valid-peaks/Nanog/profile/binsize=10/auprc 0.6942915111731189
wandb:    eval/valid-peaks/Nanog/profile/binsize=10/random_auprc 0.03948877663114

wandb:         eval/train-peaks/Klf4/profile/binsize=1/imbalance 0.0026027476763331382
wandb:            eval/train-peaks/Klf4/profile/binsize=10/auprc 0.5106468432261342
wandb:     eval/train-peaks/Klf4/profile/binsize=10/random_auprc 0.027699238269336554
wandb:      eval/train-peaks/Klf4/profile/binsize=10/n_positives 149305
wandb:    eval/train-peaks/Klf4/profile/binsize=10/frac_ambigous 0.31801553798031684
wandb:        eval/train-peaks/Klf4/profile/binsize=10/imbalance 0.02783705149682066
wandb:                          eval/train-peaks/Klf4/counts/mse 0.39439114928245544
wandb:                eval/train-peaks/Klf4/counts/var_explained 0.4010603427886963
wandb:                     eval/train-peaks/Klf4/counts/pearsonr 0.6363779550516231
wandb:                    eval/train-peaks/Klf4/counts/spearmanr 0.6317384976535204
wandb:                          eval/train-peaks/Klf4/counts/mad 0.5066742300987244
wandb:              eval/train-peaks/avg/profile/binsize=1/auprc 0.3098479501780

INFO [09-02 17:47:14] Using the following premade configuration: bpnet9
INFO [09-02 17:47:14] Using the following config.gin files: ChIP-nexus-default.gin
TF-MoDISco is using the TensorFlow backend.
Used config: ----------------------------------------
import bpnet
import bpnet.configurables
import bpnet.datasets
import bpnet.heads
import bpnet.layers
import bpnet.losses
import bpnet.metrics
import bpnet.models
import bpnet.seqmodel
import bpnet.trainers

# Macros:
augment_interval = True
batchnorm = False
dataspec = 'ChIP-nexus.dataspec.yml'
exclude_chr = ['chrX', 'chrY']
filters = 64
lambda = 10
lr = 0.004
n_bias_tracks = 2
n_dil_layers = 9
seq_width = 1000
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
tconv_kernel_size = 25
test_chr = ['chr1', 'chr8', 'chr9']
tracks_per_task = 2
use_bias = True
valid_chr = ['chr2', 'chr3', 'chr4']

# Parameters for Adam:
Adam.amsgrad = False
Adam.beta_1 = 0.9
Adam.beta_2 = 0.999
Adam.decay = 0.0
Adam.epsilon = None
Adam.lr = %lr

# Parameters for bpnet_

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO [09-02 18:20:25] Evaluating dataset: valid-peaks
229it [00:37,  6.17it/s]                                                        
  fracs = yt / yt.sum(axis=1, keepdims=True)
  is_peak = (fracs >= pos_min_threshold).astype(float)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
INFO [09-02 18:22:31] Evaluating dataset: train-peaks
711it [01:54,  6.20it/s]                                                        
INFO [09-02 18:29:26] Saved metrics to /home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50/2020-09-02_17-47-07_e6bba07e-22c1-4421-a29f-4a43d62fbede/evaluation.valid.json
INFO [09-02 18:29:27] Done!
----------------------------------------
Final metrics: 
{
  "valid-peaks": {
    "Oct4/profile/binsize=1/auprc": 0.18088892149542748,
    "Oct4/profile/binsize=1/random_auprc": 0.002895367657491975,
    "Oct4/profile/binsize=1/n_positives": 49748,
    "Oct4/profile/bi

INFO [09-02 18:29:27] Running the evaluation report
Executing:   3%|█                              | 1/29 [00:01<00:36,  1.29s/cell]2020-09-02 18:29:30.017493: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
Executing:  10%|███▏                           | 3/29 [00:03<00:33,  1.31s/cell]2020-09-02 18:29:31.543522: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-09-02 18:29:31.544234: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x56359714d850 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-02 18:29:31.544268: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-09-02 18:29:31.547291: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-02 18:29:31.614119: I tensorflow/stream_executor/cuda/cu

x2/profile/binsize=10/imbalance': 0.05609883324482376, 'Sox2/counts/mse': 0.2275805, 'Sox2/counts/var_explained': 0.3515484929084778, 'Sox2/counts/pearsonr': 0.5930045935914039, 'Sox2/counts/spearmanr': 0.5542465241101001, 'Sox2/counts/mad': 0.37702996, 'Nanog/profile/binsize=1/auprc': 0.43699234267786596, 'Nanog/profile/binsize=1/random_auprc': 0.0054454229766417285, 'Nanog/profile/binsize=1/n_positives': 410975, 'Nanog/profile/binsize=1/frac_ambigous': 0.05803734316612636, 'Nanog/profile/binsize=1/imbalance': 0.005398506385429147, 'Nanog/profile/binsize=10/auprc': 0.7190740552272019, 'Nanog/profile/binsize=10/random_auprc': 0.040468197847913986, 'Nanog/profile/binsize=10/n_positives': 247096, 'Nanog/profile/binsize=10/frac_ambigous': 0.24004590561508574, 'Nanog/profile/binsize=10/imbalance': 0.0402318735054252, 'Nanog/counts/mse': 0.58523697, 'Nanog/counts/var_explained': 0.37088334560394287, 'Nanog/counts/pearsonr': 0.6107402239081987, 'Nanog/counts/spearmanr': 0.617136210444347, 'N

wandb:                          eval/valid-peaks/Sox2/counts/mse 0.23585037887096405
wandb:                eval/valid-peaks/Sox2/counts/var_explained 0.34578680992126465
wandb:                     eval/valid-peaks/Sox2/counts/pearsonr 0.5884341670883255
wandb:                    eval/valid-peaks/Sox2/counts/spearmanr 0.5480583436990223
wandb:                          eval/valid-peaks/Sox2/counts/mad 0.3832675814628601
wandb:            eval/valid-peaks/Nanog/profile/binsize=1/auprc 0.4069695115202532
wandb:     eval/valid-peaks/Nanog/profile/binsize=1/random_auprc 0.005233572833043135
wandb:      eval/valid-peaks/Nanog/profile/binsize=1/n_positives 131431
wandb:    eval/valid-peaks/Nanog/profile/binsize=1/frac_ambigous 0.05855871441550575
wandb:        eval/valid-peaks/Nanog/profile/binsize=1/imbalance 0.0052849090355574805
wandb:           eval/valid-peaks/Nanog/profile/binsize=10/auprc 0.6914068459087236
wandb:    eval/valid-peaks/Nanog/profile/binsize=10/random_auprc 0.0395093651923

wandb:         eval/train-peaks/Klf4/profile/binsize=1/imbalance 0.0025936991873233535
wandb:            eval/train-peaks/Klf4/profile/binsize=10/auprc 0.5069436885644435
wandb:     eval/train-peaks/Klf4/profile/binsize=10/random_auprc 0.0271567784944443
wandb:      eval/train-peaks/Klf4/profile/binsize=10/n_positives 148841
wandb:    eval/train-peaks/Klf4/profile/binsize=10/frac_ambigous 0.3178633022155521
wandb:        eval/train-peaks/Klf4/profile/binsize=10/imbalance 0.02775140546015116
wandb:                          eval/train-peaks/Klf4/counts/mse 0.4605434536933899
wandb:                eval/train-peaks/Klf4/counts/var_explained 0.39447033405303955
wandb:                     eval/train-peaks/Klf4/counts/pearsonr 0.6296619882534754
wandb:                    eval/train-peaks/Klf4/counts/spearmanr 0.62634706873172
wandb:                          eval/train-peaks/Klf4/counts/mad 0.5586501955986023
wandb:              eval/train-peaks/avg/profile/binsize=1/auprc 0.3092829838709117
w

INFO [09-02 18:31:35] Using the following premade configuration: bpnet9
INFO [09-02 18:31:35] Using the following config.gin files: ChIP-nexus-default.gin
TF-MoDISco is using the TensorFlow backend.
Used config: ----------------------------------------
import bpnet
import bpnet.configurables
import bpnet.datasets
import bpnet.heads
import bpnet.layers
import bpnet.losses
import bpnet.metrics
import bpnet.models
import bpnet.seqmodel
import bpnet.trainers

# Macros:
augment_interval = True
batchnorm = False
dataspec = 'ChIP-nexus.dataspec.yml'
exclude_chr = ['chrX', 'chrY']
filters = 64
lambda = 10
lr = 0.004
n_bias_tracks = 2
n_dil_layers = 9
seq_width = 1000
tasks = ['Oct4', 'Sox2', 'Nanog', 'Klf4']
tconv_kernel_size = 25
test_chr = ['chr1', 'chr8', 'chr9']
tracks_per_task = 2
use_bias = True
valid_chr = ['chr2', 'chr3', 'chr4']

# Parameters for Adam:
Adam.amsgrad = False
Adam.beta_1 = 0.9
Adam.beta_2 = 0.999
Adam.decay = 0.0
Adam.epsilon = None
Adam.lr = %lr

# Parameters for bpnet_

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


Epoch 10/10
INFO [09-02 19:04:38] Evaluating dataset: valid-peaks
229it [00:36,  6.26it/s]                                                        
  fracs = yt / yt.sum(axis=1, keepdims=True)
  is_peak = (fracs >= pos_min_threshold).astype(float)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
INFO [09-02 19:06:43] Evaluating dataset: train-peaks
711it [01:50,  6.43it/s]                                                        
INFO [09-02 19:13:32] Saved metrics to /home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50/2020-09-02_18-31-28_02346b76-e4f9-4e30-be1a-7466539ca1a6/evaluation.valid.json
INFO [09-02 19:13:32] Done!
----------------------------------------
Final metrics: 
{
  "valid-peaks": {
    "Oct4/profile/binsize=1/auprc": 0.1738743029387763,
    "Oct4/profile/binsize=1/random_auprc": 0.002960082804739347,
    "Oct4/profile/binsize=1/n_positives": 49613,
    "Oct4

INFO [09-02 19:13:32] Running the evaluation report
Executing:   3%|█                              | 1/29 [00:01<00:35,  1.27s/cell]2020-09-02 19:13:35.771986: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
Executing:  10%|███▏                           | 3/29 [00:03<00:33,  1.30s/cell]2020-09-02 19:13:37.318680: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-09-02 19:13:37.319751: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x5625f3cce8b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-02 19:13:37.319813: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-09-02 19:13:37.323219: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-02 19:13:37.392468: I tensorflow/stream_executor/cuda/cu

/binsize=1/imbalance': 0.0029965934590750337, 'Oct4/profile/binsize=10/auprc': 0.5119341219347598, 'Oct4/profile/binsize=10/random_auprc': 0.034526831959910034, 'Oct4/profile/binsize=10/n_positives': 121637, 'Oct4/profile/binsize=10/frac_ambigous': 0.36285924226674793, 'Oct4/profile/binsize=10/imbalance': 0.03421461678627826, 'Oct4/counts/mse': 0.32700565, 'Oct4/counts/var_explained': 0.33942103385925293, 'Oct4/counts/pearsonr': 0.5902471820913431, 'Oct4/counts/spearmanr': 0.5450652951543913, 'Oct4/counts/mad': 0.46392977, 'Sox2/profile/binsize=1/auprc': 0.4407745025210917, 'Sox2/profile/binsize=1/random_auprc': 0.006261757578954559, 'Sox2/profile/binsize=1/n_positives': 55541, 'Sox2/profile/binsize=1/frac_ambigous': 0.07064192513368985, 'Sox2/profile/binsize=1/imbalance': 0.006391738625177197, 'Sox2/profile/binsize=10/auprc': 0.7929912771109496, 'Sox2/profile/binsize=10/random_auprc': 0.05424659378446406, 'Sox2/profile/binsize=10/n_positives': 35763, 'Sox2/profile/binsize=10/frac_ambi

wandb:                     eval/valid-peaks/Oct4/counts/pearsonr 0.5738964173215796
wandb:                    eval/valid-peaks/Oct4/counts/spearmanr 0.5428652617542071
wandb:                          eval/valid-peaks/Oct4/counts/mad 0.4723202884197235
wandb:             eval/valid-peaks/Sox2/profile/binsize=1/auprc 0.3749831679578486
wandb:      eval/valid-peaks/Sox2/profile/binsize=1/random_auprc 0.005827952328284792
wandb:       eval/valid-peaks/Sox2/profile/binsize=1/n_positives 17463
wandb:     eval/valid-peaks/Sox2/profile/binsize=1/frac_ambigous 0.07191687344913152
wandb:         eval/valid-peaks/Sox2/profile/binsize=1/imbalance 0.005836291082636507
wandb:            eval/valid-peaks/Sox2/profile/binsize=10/auprc 0.7290268144784176
wandb:     eval/valid-peaks/Sox2/profile/binsize=10/random_auprc 0.053188090284870544
wandb:      eval/valid-peaks/Sox2/profile/binsize=10/n_positives 11664
wandb:    eval/valid-peaks/Sox2/profile/binsize=10/frac_ambigous 0.32768920595533496
wandb:    

wandb:    eval/train-peaks/Nanog/profile/binsize=10/random_auprc 0.03944601328668088
wandb:     eval/train-peaks/Nanog/profile/binsize=10/n_positives 247138
wandb:   eval/train-peaks/Nanog/profile/binsize=10/frac_ambigous 0.24034552242496168
wandb:       eval/train-peaks/Nanog/profile/binsize=10/imbalance 0.04021775763275393
wandb:                         eval/train-peaks/Nanog/counts/mse 0.5593143701553345
wandb:               eval/train-peaks/Nanog/counts/var_explained 0.3646731972694397
wandb:                    eval/train-peaks/Nanog/counts/pearsonr 0.6120724281818577
wandb:                   eval/train-peaks/Nanog/counts/spearmanr 0.6234456891139375
wandb:                         eval/train-peaks/Nanog/counts/mad 0.584722101688385
wandb:             eval/train-peaks/Klf4/profile/binsize=1/auprc 0.15732587802466144
wandb:      eval/train-peaks/Klf4/profile/binsize=1/random_auprc 0.0025730266912160024
wandb:       eval/train-peaks/Klf4/profile/binsize=1/n_positives 190888
wandb:    

2020-09-02 19:15:39.305512: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1180] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-09-02 19:15:39.305580: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1186]      0 
2020-09-02 19:15:39.305591: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1199] 0:   N 
2020-09-02 19:15:39.306143: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-09-02 19:15:39.306711: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:983] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-09-02 19:15:39.307184: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1325] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 5148 MB memory) -> physical GPU (device: 0, name: Tesla K80, pc

Epoch 1/10
2020-09-02 19:15:51.271079: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2020-09-02 19:15:51.496495: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO [09-02 19:48:41] Evaluating dataset: valid-peaks
229it [00:37,  6.10it/s]                                                        
  fracs = yt / yt.sum(axis=1, keepdims=True)
  is_peak = (fracs >= pos_min_threshold).astype(float)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
  ambigous = (fracs < pos_min_threshold) & (fracs >= neg_max_threshold)
INFO [09-02 19:50:46] Evaluating dataset: train-peaks
711it [01:54,  6.22it/s]                                                        
INFO [09-02 19:57:44] Saved metrics to /home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50/2020-09-02_19-15-32_1a82503f-e4b5-4164-8b6f-04a80d61d5cd/evaluation.valid.json
INFO [09-02 19:57:44] Done!
----------------------------------------
Final metrics: 
{
  "valid-peaks": {
    "Oct4/profile/binsize=1/auprc": 0.17287880170349765,
    "Oct4/profile/binsize=1/random_auprc": 0.0028972852506997182,
    "Oct4/profile/binsize=1/n_positives": 49554,
    "Oct4/profile/b

INFO [09-02 19:57:44] Running the evaluation report
Executing:   3%|█                              | 1/29 [00:01<00:35,  1.25s/cell]2020-09-02 19:57:47.185108: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
Executing:  10%|███▏                           | 3/29 [00:03<00:33,  1.28s/cell]2020-09-02 19:57:48.725111: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2200000000 Hz
2020-09-02 19:57:48.726062: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55fe5e101e10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-02 19:57:48.726110: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-09-02 19:57:48.730504: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-02 19:57:48.800165: I tensorflow/stream_executor/cuda/cu

wandb: Waiting for W&B process to finish, PID 6800
ox2/profile/binsize=10/imbalance': 0.05608734904701838, 'Sox2/counts/mse': 0.29732144, 'Sox2/counts/var_explained': 0.3124459981918335, 'Sox2/counts/pearsonr': 0.5707770475987016, 'Sox2/counts/spearmanr': 0.5496020381015704, 'Sox2/counts/mad': 0.39270914, 'Nanog/profile/binsize=1/auprc': 0.4520624791149324, 'Nanog/profile/binsize=1/random_auprc': 0.005521350952017978, 'Nanog/profile/binsize=1/n_positives': 411913, 'Nanog/profile/binsize=1/frac_ambigous': 0.05827248259173293, 'Nanog/profile/binsize=1/imbalance': 0.005400284822930656, 'Nanog/profile/binsize=10/auprc': 0.7297290655162583, 'Nanog/profile/binsize=10/random_auprc': 0.0413535346029784, 'Nanog/profile/binsize=10/n_positives': 248015, 'Nanog/profile/binsize=10/frac_ambigous': 0.24117623092498394, 'Nanog/profile/binsize=10/imbalance': 0.040352779105162905, 'Nanog/counts/mse': 0.6081712, 'Nanog/counts/var_explained': 0.33881711959838867, 'Nanog/counts/pearsonr': 0.609545833952243

wandb:                          eval/valid-peaks/Sox2/counts/mse 0.2900170683860779
wandb:                eval/valid-peaks/Sox2/counts/var_explained 0.326099157333374
wandb:                     eval/valid-peaks/Sox2/counts/pearsonr 0.5849559355397783
wandb:                    eval/valid-peaks/Sox2/counts/spearmanr 0.552382947580848
wandb:                          eval/valid-peaks/Sox2/counts/mad 0.3888988196849823
wandb:            eval/valid-peaks/Nanog/profile/binsize=1/auprc 0.42267641053344684
wandb:     eval/valid-peaks/Nanog/profile/binsize=1/random_auprc 0.005264769703912481
wandb:      eval/valid-peaks/Nanog/profile/binsize=1/n_positives 131131
wandb:    eval/valid-peaks/Nanog/profile/binsize=1/frac_ambigous 0.05827785356980446
wandb:        eval/valid-peaks/Nanog/profile/binsize=1/imbalance 0.005276866583254742
wandb:           eval/valid-peaks/Nanog/profile/binsize=10/auprc 0.7049438589953516
wandb:    eval/valid-peaks/Nanog/profile/binsize=10/random_auprc 0.0395907644512281


wandb:         eval/train-peaks/Klf4/profile/binsize=1/imbalance 0.0026058547813472355
wandb:            eval/train-peaks/Klf4/profile/binsize=10/auprc 0.5114899381713155
wandb:     eval/train-peaks/Klf4/profile/binsize=10/random_auprc 0.02834283894173522
wandb:      eval/train-peaks/Klf4/profile/binsize=10/n_positives 149686
wandb:    eval/train-peaks/Klf4/profile/binsize=10/frac_ambigous 0.31823936319015284
wandb:        eval/train-peaks/Klf4/profile/binsize=10/imbalance 0.02791866905952847
wandb:                          eval/train-peaks/Klf4/counts/mse 0.4974331855773926
wandb:                eval/train-peaks/Klf4/counts/var_explained 0.38057005405426025
wandb:                     eval/train-peaks/Klf4/counts/pearsonr 0.6181013182560472
wandb:                    eval/train-peaks/Klf4/counts/spearmanr 0.6207310209427741
wandb:                          eval/train-peaks/Klf4/counts/mad 0.5355029106140137
wandb:              eval/train-peaks/avg/profile/binsize=1/auprc 0.30687015480873

In [93]:
! echo {output_dir}
! echo
! ls {output_dir}/output_ensemble/0

/home/stephenmalina/project/dat/res-bpnet-training-2020-09-02-16-18-50

bpnet-train.kwargs.json			    history.csv
config.gin				    input-config.gin
config.gin.json				    log
dataspec.yml				    model.h5
evaluate.html				    note_params.json
evaluate.ipynb				    seq_model.pkl
evaluation.valid.json			    wandb.json
events.out.tfevents.1599063546.gregor-3-vm


In [94]:
!ls -latr {example_model_dir}/

total 14976
-rw-r--r--  1 stephenmalina stephenmalina       2 Sep  2 16:18 note_params.json
drwxr-xr-x  2 stephenmalina stephenmalina    4096 Sep  2 16:18 log
-rw-r--r--  1 stephenmalina stephenmalina     328 Sep  2 16:18 wandb.json
-rw-r--r--  1 stephenmalina stephenmalina     302 Sep  2 16:18 input-config.gin
-rw-r--r--  1 stephenmalina stephenmalina     579 Sep  2 16:18 bpnet-train.kwargs.json
-rw-r--r--  1 stephenmalina stephenmalina    1562 Sep  2 16:18 dataspec.yml
-rw-r--r--  1 stephenmalina stephenmalina    5943 Sep  2 16:18 config.gin
-rw-r--r--  1 stephenmalina stephenmalina    4191 Sep  2 16:18 config.gin.json
-rw-r--r--  1 stephenmalina stephenmalina    3743 Sep  2 16:51 history.csv
-rw-r--r--  1 stephenmalina stephenmalina 1733204 Sep  2 16:51 events.out.tfevents.1599063546.gregor-3-vm
-rw-r--r--  1 stephenmalina stephenmalina 1810128 Sep  2 16:52 model.h5
-rw-r--r--  1 stephenmalina stephenmalina 2401402 Sep  2 16:52 seq_model.pkl
-rw-r--r--  1 stephenmalina 