In [None]:
import numpy
from src.models.conv_model import build_model as build_conv_model
from src.data_loader.RNASeqStructLoader import RNASeqStructDataGenerator 
from src.models.conv_model import correlation_coefficient_loss, pearson_r

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv1D, Dropout, Flatten, BatchNormalization, MaxPool1D, Activation
from tensorflow.keras.optimizers import RMSprop
import tensorflow as tf

def custom_model(dropout):
    model = Sequential()
    model.add(Input(shape=(101,3)))
    model.add(Conv1D(filters=128, kernel_size=32, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(dropout))
    model.add(Conv1D(filters=128, kernel_size=32, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(dropout))
    model.add(Conv1D(filters=64, kernel_size=32, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(dropout))
    model.add(MaxPool1D(pool_size=2, padding="same"))
    model.add(Conv1D(filters=32, kernel_size=16, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(dropout))
    model.add(Conv1D(filters=32, kernel_size=16, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(dropout))
    model.add(Conv1D(filters=32, kernel_size=16, padding='same'))
    model.add(BatchNormalization())
    model.add(Activation("relu"))
    model.add(Dropout(dropout))
    model.add(MaxPool1D(pool_size=2, padding="same"))
    model.add(Flatten())
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    optimizer = tf.keras.optimizers.RMSprop(decay=0.0005)
    model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse', correlation_coefficient_loss, pearson_r])
    return model 

In [None]:
#model = build_conv_model(101, 4, 64, 15, [16], blocks=2, dil_rate=1, pooling_size=1, dropout=0.1)
#model = custom_model(0.3)
import numpy
from src.models.conv_model import build_model as build_conv_model
import tensorflow as tf
from src.evaluator.evaluator import Evaluator
from src.models.conv_model import correlation_coefficient_loss, pearson_r
import math
dependencies = {
    'correlation_coefficient_loss': correlation_coefficient_loss,
    'pearson_r': pearson_r
}
def cosJump(epoch):
    max_lrate = -3
    min_lrate = -5
    t_s = 10
    epoch = epoch%10
    lrate = min_lrate + 0.5*(max_lrate-min_lrate)*(1+math.cos(epoch*1.0/t_s * math.pi))
    return math.pow(10, lrate)
lrate = tf.keras.callbacks.LearningRateScheduler(cosJump, verbose=1)
optimizer = tf.keras.optimizers.RMSprop(lr=3e-5)

model = tf.keras.models.load_model("models/cDNA-ABE/model-30-0.5643.h5", custom_objects=dependencies)

model.compile(loss='mse', optimizer=optimizer, metrics=['mae', 'mse', correlation_coefficient_loss, pearson_r])
print(model.summary())

In [None]:
train_generator = RNASeqStructDataGenerator("data/processed/cDNA-ABE/train_data.hdf5", 1024)
validation_generator = RNASeqStructDataGenerator("data/processed/cDNA-ABE/validation_data.hdf5", 1024)

In [None]:
import tensorflow as tf
from time import time

checkpoint_filepath = "models/cDNA-ABE/12.14.20/model-{epoch:02d}-{val_pearson_r:.4f}.h5"

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    #monitor='val_pearson_r',
    #mode='max',
    save_best_only=False)
history = model.fit(x=train_generator, epochs=300, validation_data=validation_generator, callbacks=[model_checkpoint_callback, lrate], use_multiprocessing=True, workers=40)

In [None]:
import numpy
from src.models.conv_model import build_model as build_conv_model
from src.data_loader.RNASeqStructLoader import RNASeqStructDataGenerator 

import tensorflow as tf
from src.evaluator.evaluator import Evaluator
from src.models.conv_model import correlation_coefficient_loss, pearson_r
test_generator = RNASeqStructDataGenerator("data/processed/cDNA-ABE/test_data.hdf5", 3102)
dependencies = {
    'correlation_coefficient_loss': correlation_coefficient_loss,
    'pearson_r': pearson_r
}

model = tf.keras.models.load_model("models/cDNA-ABE/model-30-0.5643.h5", custom_objects=dependencies)

ev = Evaluator(model, test_generator)
ev.evaluate()

#ev.generatePlot("cDNA-ABE sequence+structure")

In [None]:
import seaborn as sns

In [None]:
import numpy
from src.models.conv_model import build_model as build_conv_model
from src.data_loader.RNASeqStructLoader import RNASeqStructDataGenerator 

import tensorflow as tf
from src.evaluator.evaluator import Evaluator
from src.models.conv_model import correlation_coefficient_loss, pearson_r

test_generator = RNASeqStructDataGenerator("data/processed/cDNA-ABE/test_data.hdf5", 3102, structure_only=True)
dependencies = {
    'correlation_coefficient_loss': correlation_coefficient_loss,
    'pearson_r': pearson_r
}

model = tf.keras.models.load_model("models/cDNA-ABE/12.14.20/structure-model-04-0.1711.h5", custom_objects=dependencies)

In [None]:
tgts = model.predict(test_generator, workers = 20, use_multiprocessing=True)

In [None]:
gt = list()
for x in test_generator:
    gt.append(x[1])

In [None]:
import numpy as np
gft = np.concatenate(gt)

In [None]:
gft = gft.reshape(gft.shape[0], 1)

In [None]:
print(gft.shape)

In [None]:
x = 2907
print(gft[x])
print(tgts[x])

In [None]:
res = np.hstack((gft, tgts))
print(res.shape)

In [None]:
import pandas as pd
df = pd.DataFrame(data=res)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

In [None]:
import scipy.stats as stats
import matplotlib.cm as cm
import matplotlib.pyplot as plt
df.columns = ['observed editing', 'predicted editing']
p = sns.jointplot(data=df, x="observed editing", y="predicted editing", kind="hex", bins='log',  cmap='BuPu')
p.annotate(stats.pearsonr)
p.fig.suptitle("cDNA ABE Structure Only")
plt.ylim(0, 1)
plt.xlim(0,1)
plt.show()