# Like RBPNet
https://doi.org/10.1186/s13059-023-03015-7

Changes:
- "n_filters": 128
- "n_layers": 9
- "batch_size": 128
- "in_window": 300
- "out_window": 300
- "max_jitter": 0

In [None]:
#!pip install bpnet-lite

In [None]:
!bpnet fit -p rbpnet_fit_example.json

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

log = pd.read_table("example.log")
log

In [None]:
sns.lineplot(data=log, x="Epoch", y="Training MNLL", label="train")
sns.lineplot(data=log, x="Epoch", y="Validation MNLL", label="val")
plt.title("MNLL Loss")
plt.show()

sns.lineplot(data=log, x="Epoch", y="Training Count MSE", label="train")
sns.lineplot(data=log, x="Epoch", y="Validation Count MSE", label="val")
plt.title("Count MSE Loss")
plt.show()

sns.lineplot(data=log, x="Epoch", y="Validation Profile Pearson", color="green", label="profile")
sns.lineplot(data=log, x="Epoch", y="Validation Profile Pearson", color="brown", label="count")
plt.title("Validation Pearson")
plt.show()

In [None]:
!bpnet predict -p rbpnet_predict_example.json

In [None]:
import numpy as np
import torch

y_profile = np.load("y_profile.npz")
print(list(y_profile.keys()))
y_profile = y_profile["arr_0"]

y_counts = np.load("y_counts.npz")
print(list(y_counts.keys()))
y_counts = y_counts["arr_0"]

print(y_profile.shape, y_counts.shape) # batch, strands, seq_len

In [None]:
# get ground truth
from tangermeme.io import extract_loci
import json

with open("rbpnet_fit_example.json", "r") as f:
    parameters_fit = json.load(f)
    
with open("rbpnet_predict_example.json", "r") as f:
    parameters_predict = json.load(f)

parameters_fit['controls'] = None

valid_data = extract_loci(
    sequences=parameters_fit['sequences'],
    signals=parameters_fit['signals'],
    in_signals=parameters_fit['controls'],
    loci=parameters_fit['loci'],
    chroms=parameters_fit['validation_chroms'],
    in_window=parameters_predict['in_window'],
    out_window=parameters_predict['out_window'],
    max_jitter=0,
    ignore=list('QWERYUIOPSDFHJKLZXVBNM'),
    verbose=parameters_fit['verbose']
)
print(valid_data[0].shape, valid_data[1].shape) # sequences and signals

In [None]:
idx = 150

def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

# predictions
y = y_profile[idx, 0, :]
y = softmax(y)
x = np.arange(len(y))
sns.lineplot(x=x, y=y, label="plus", color="blue")

y = y_profile[idx, 1, :]
y = softmax(y)
x = np.arange(len(y))
sns.lineplot(x=x, y=-y, label="minus", color="orange")
plt.title("Predictions")
plt.show()

# ground truth
y = valid_data[1][idx, 0, :]
x = np.arange(len(y))
sns.lineplot(x=x, y=y, label="plus", color="blue")

y = valid_data[1][idx, 1, :]
x = np.arange(len(y))
sns.lineplot(x=x, y=-y, label="minus", color="orange")
plt.title("Ground Truth")
plt.show()

In [None]:
x = y_counts[:,0]
y = valid_data[1].sum(axis=1).sum(axis=1)
sns.scatterplot(x=x, y=y, alpha=0.1)
plt.ylabel("Ground Truth")
plt.xlabel("Predicted")
plt.title("Total Counts")
plt.show()