In [1]:
import time
# import keras

# from keras.models import Model
# from keras.layers import Dropout, Flatten, BatchNormalization, TimeDistributed, Input, Add, Concatenate
# from keras.layers import Dense, Conv2D, MaxPooling2D, LSTM, TimeDistributed, Reshape
# import keras.backend as K
# import keras.callbacks as callbacks

# import pandas as pd
# import numpy as np
# from numpy import array
# from sklearn.linear_model import LinearRegression
# from sklearn.model_selection import KFold

In [2]:
import keras

from keras.models import Model
from keras.layers import Dropout, Flatten, BatchNormalization, TimeDistributed, Input, Add, Concatenate
from keras.layers import Dense, Conv2D, MaxPooling2D, LSTM, TimeDistributed, Reshape
import keras.backend as K
import keras.callbacks as callbacks

import pandas as pd
import numpy as np
from numpy import array
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold

In [3]:
save_path = "/Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/"
model_name = "ir_lstm_separateC0s"
kf = KFold(n_splits = 10, shuffle =True)
num_epochs = 60

#### define functions ####

def model_cycle():
    inputs = Input(shape=(50, 4, 1))
        
    x = Conv2D(48, kernel_size=(3,4),
                   activation='relu',
                   padding='valid')(inputs)
    x = MaxPooling2D((2,1),padding='same')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)

    # parallel line 1
    fx1 = Conv2D(48, kernel_size=(3,1),
                   activation='relu',
                   padding='same')(x)
    fx1 = BatchNormalization()(fx1)
    fx1 = Dropout(0.2)(fx1)
    fx1 = Conv2D(48, kernel_size=(3,1),
                   activation='relu',
                   padding='same')(fx1)
    fx1 = MaxPooling2D((2,1),padding='same')(fx1)
    fx1 = BatchNormalization()(fx1)
    fx1 = Dropout(0.2)(fx1)

    # parallel line 2
    fx2 = Conv2D(48, kernel_size=(11,1),
                   activation='relu',
                   padding='same')(x)
    fx2 = BatchNormalization()(fx2)
    fx2 = Dropout(0.2)(fx2)
    fx2 = Conv2D(48, kernel_size=(21,1),
                   activation='relu',
                   padding='same')(fx2)
    fx2 = MaxPooling2D((2,1),padding='same')(fx2)
    fx2 = BatchNormalization()(fx2)
    fx2 = Dropout(0.2)(fx2)
    
    # Add
    x1 = Concatenate(axis=-3)([fx1, fx2])
    x = Add()([x, x1])
    x = MaxPooling2D((2,1),padding='same')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    
    x = Reshape((K.int_shape(x)[1], K.int_shape(x)[3]))(x)
    x = LSTM(20, return_sequences=False)(x)
    x = Dropout(0.2)(x)

    outputs = Dense(1, activation='linear')(x)

    network = Model(inputs, outputs)
    network.compile(optimizer='rmsprop',
                    loss='mean_squared_error')
    return network
    
def dnaOneHot(sequence):
    seq_array = array(list(sequence))
    code = {"A": [0], "C": [1], "G": [2], "T": [3], "N": [4],
            "a": [0], "c": [1], "g": [2], "t": [3], "n": [4]}
    onehot_encoded_seq = []
    for char in seq_array:
        onehot_encoded = np.zeros(5)
        onehot_encoded[code[char]] = 1
        onehot_encoded_seq.append(onehot_encoded[0:4])
    return onehot_encoded_seq



In [4]:
def display_fits(fits):
    print(f"Average correlation on tiling: {np.mean([fits[0][i] for i in range(fits[0].size) if (i % 8) == 5])}",
          f"\nAverage MSE on tiling: {np.mean([fits[1][i] for i in range(fits[1].size) if (i % 8) == 5])}",
          f"\nAverage correlation on random: {np.mean([fits[0][i] for i in range(fits[0].size) if (i % 8) == 3])}",
          f"\nAverage MSE on random: {np.mean([fits[1][i] for i in range(fits[1].size) if (i % 8) == 3])}",
          f"\nAverage correlation on ChrV: {np.mean([fits[0][i] for i in range(fits[0].size) if (i % 8) == 7])}",
          f"\nAverage MSE on ChrV: {np.mean([fits[1][i] for i in range(fits[1].size) if (i % 8) == 7])}",
          f"\nAverage correlation on CN: {np.mean([fits[0][i] for i in range(fits[0].size) if (i % 8) == 0])}",
          f"\nAverage MSE on CN: {np.mean([fits[1][i] for i in range(fits[1].size) if (i % 8) == 0])}")

In [5]:
def find_c0new(dat, aa):
  mat = np.empty((3,3), float)
  k = 2*np.pi/10.4
  n = array([26, 29, 31])
  mat[0:3,0] = 1
  mat[0:3, 1] = np.sin(n*k)
  mat[0:3, 2] = np.cos(n*k)
  mat[0,1:3] = mat[0,1:3]*aa[0]
  mat[1,1:3] = mat[1,1:3]*aa[1]
  mat[2,1:3] = mat[2,1:3]*aa[2]
  inv_mat = np.linalg.inv(mat)
  c0A1A2 = array(np.matmul(dat[["n=26", "n=29", "n=31"]], np.transpose(inv_mat)))
  c0Aphi = c0A1A2
  c0Aphi[:,0] = c0A1A2[:,0]
  c0Aphi[:,1] = np.sqrt(c0A1A2[:,1]**2 + c0A1A2[:,2]**2)
  c0Aphi[:,2] = np.sign(c0A1A2[:,2]) * np.arccos(c0A1A2[:,1]/c0Aphi[:,1])
  return c0Aphi[:,0]

In [6]:
class TimeHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.process_time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.process_time() - self.epoch_time_start)

In [7]:
data_cerevisiae_nucle = pd.read_csv("/Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/cycle1.txt",delimiter = ",")
X1 = []
for sequence_nt in data_cerevisiae_nucle["Sequence"]:
    X1.append(dnaOneHot(sequence_nt))
X1 = array(X1)
X1 = X1.reshape((X1.shape[0],50,4,1))
X1_reverse = np.flip(X1,[1,2])
# Y1 = data_cerevisiae_nucle["C0"].values.astype(float)
# v6:
Y1 = find_c0new(data_cerevisiae_nucle, aa=[1, 0.35, 1.11]).astype(float)

In [11]:
Y1_26 = data_cerevisiae_nucle["n=26"]
Y1_31 = data_cerevisiae_nucle["n=31"]

In [13]:
Y1 = np.concatenate([array(Y1_26), array(Y1_31)])

In [17]:
X1.shape

(19907, 50, 4, 1)

In [18]:
X1_2 = np.concatenate([X1, X1], axis=0)

In [19]:
X1_2.shape

(39814, 50, 4, 1)

In [8]:
data_random_library = pd.read_csv("/Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/cycle3.txt",delimiter = ",")
X3 = []
for sequence_nt in data_random_library["Sequence"]:
    X3.append(dnaOneHot(sequence_nt))
X3 = array(X3)
X3 = X3.reshape((X3.shape[0],50,4,1))
X3_reverse = np.flip(X3,[1,2])
Y3 = data_random_library["C0"].values.astype(float)

In [9]:
data_tiling = pd.read_csv("/Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/cycle5.txt",delimiter = ",")
X5 = []
for sequence_nt in data_tiling["Sequence"]:
    X5.append(dnaOneHot(sequence_nt))
X5 = array(X5)
X5 = X5.reshape((X5.shape[0],50,4,1))
X5_reverse = np.flip(X5,[1,2])
# Y5 = data_tiling["C0"].values.astype(float)
# v4:
Y5 = find_c0new(data_tiling, aa=[1, 0.39, 1.18]).astype(float)

In [10]:
data_chr5 = pd.read_csv("/Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/cycle6.txt",delimiter = ",")
X6 = []
for sequence_nt in data_chr5["Sequence"]:
    X6.append(dnaOneHot(sequence_nt))
X6 = array(X6)
X6 = X6.reshape((X6.shape[0],50,4,1))
X6_reverse = np.flip(X6,[1,2])
# Y6 = data_chr5["C0"].values.astype(float)
# v6:
Y6 = find_c0new(data_chr5, aa=[1, 0.35, 1.11]).astype(float)

In [11]:
m1 = np.mean(Y1)
std1 = np.std(Y1)
Z1 = (Y1-m1)/std1

m3 = np.mean(Y3)
std3 = np.std(Y3)
Z3 = (Y3-m3)/std3


m5 = np.mean(Y5)
std5 = np.std(Y5)
Z5 = (Y5-m5)/std5


m6 = np.mean(Y6)
std6 = np.std(Y6)
Z6 = (Y6-m6)/std6

In [12]:
Y1[:6], Y3[:6], Y5[:6], Y6[:6]

(array([ 0.02360541, -0.40108617,  1.30018368,  0.3337653 ,  0.23685198,
        -0.68780525]),
 array([-0.081574, -0.005321, -0.08891 , -0.044137, -0.225836,  0.158945]),
 array([-0.55739014, -0.55253899, -0.2752565 , -0.4208636 , -0.25469771,
         0.37953686]),
 array([ 0.06143367, -0.76210367, -0.45450441,  0.30210197,  0.06623511,
        -0.21504718]))

In [13]:
#### tiling

VALIDATION_LOSS = []
fold_var = 1
n = Y5.shape[0]

fits = []
detrend = []
times = []
times2 = []

for train_index, val_index in kf.split(Y5):
    training_X = X5[train_index]
    training_X_reverse = X5_reverse[train_index]
    validation_X = X5[val_index]
    validation_X_reverse = X5_reverse[val_index]
    training_Y = Y5[train_index]
    validation_Y = Y5[val_index]
    # CREATE NEW MODEL
    model = model_cycle()
    # CREATE CALLBACKS
    checkpoint = callbacks.ModelCheckpoint(save_path + model_name+"_tiling_"+str(fold_var)+".h5",
                                                    monitor='val_loss', verbose=1,
                                                    save_best_only=True, mode='min')
    time_callback = TimeHistory()

    history = model.fit(training_X, training_Y,
                        epochs=num_epochs,
                        callbacks= [checkpoint, time_callback],
                        validation_data=(validation_X, validation_Y))
    model.load_weights(save_path + model_name+"_tiling_"+str(fold_var)+".h5")
    model.save(save_path+model_name+"_tiling_"+str(fold_var),save_traces=False)
    times.append(time_callback.times)

    pred_Y = model.predict(training_X)
    pred_Y = pred_Y.reshape(pred_Y.shape[0])
    pred_Y_reverse = model.predict(training_X_reverse)
    pred_Y_reverse = pred_Y_reverse.reshape(pred_Y_reverse.shape[0])
    pred_Y = (pred_Y+pred_Y_reverse)/2
    reg =  LinearRegression().fit(array(pred_Y).reshape(-1, 1), array(training_Y).reshape(-1, 1))
    
    detrend_int = reg.intercept_
    detrend_slope = reg.coef_
    detrend.append([float(detrend_int), float(detrend_slope)])

    start_time = time.process_time()
    fit = model.predict(X1)
    fit = fit.reshape(fit.shape[0])
    fit_reverse = model.predict(X1_reverse)
    fit_reverse = fit_reverse.reshape(fit_reverse.shape[0])
    reverse_corr = np.corrcoef(fit, fit_reverse)[0,1]
    fit = (fit + fit_reverse)/2
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, Y1)[0,1],np.mean(np.square(fit-Y1)),np.mean(fit),np.std(fit),reverse_corr]
    fits.append(fit_tmp)
    fit = detrend_int + fit * detrend_slope
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, Y1)[0,1],np.mean(np.square(fit-Y1)),np.mean(fit),np.std(fit),reverse_corr]
    time0 = time.process_time() - start_time
    times2.append([time0])
    fits.append(fit_tmp)
    
    start_time = time.process_time()
    fit = model.predict(X3)
    fit = fit.reshape(fit.shape[0])
    fit_reverse = model.predict(X3_reverse)
    fit_reverse = fit_reverse.reshape(fit_reverse.shape[0])
    reverse_corr = np.corrcoef(fit, fit_reverse)[0,1]
    fit = (fit + fit_reverse)/2
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, Y3)[0,1],np.mean(np.square(fit-Y3)),np.mean(fit),np.std(fit),reverse_corr]
    fits.append(fit_tmp)
    fit = detrend_int + fit * detrend_slope
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, Y3)[0,1],np.mean(np.square(fit-Y3)),np.mean(fit),np.std(fit),reverse_corr]
    time0 = time.process_time() - start_time
    times2.append([time0])
    fits.append(fit_tmp)
    
    start_time = time.process_time()
    fit = model.predict(validation_X)
    fit = fit.reshape(fit.shape[0])
    fit_reverse = model.predict(validation_X_reverse)
    fit_reverse = fit_reverse.reshape(fit_reverse.shape[0])
    reverse_corr = np.corrcoef(fit, fit_reverse)[0,1]
    fit = (fit + fit_reverse)/2
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, validation_Y)[0,1],np.mean(np.square(fit-validation_Y)),np.mean(fit),np.std(fit),reverse_corr]
    fits.append(fit_tmp)
    fit = detrend_int + fit * detrend_slope
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, validation_Y)[0,1],np.mean(np.square(fit-validation_Y)),np.mean(fit),np.std(fit),reverse_corr]
    time0 = time.process_time() - start_time
    times2.append([time0])
    fits.append(fit_tmp)
    
    start_time = time.process_time()
    fit = model.predict(X6)
    fit = fit.reshape(fit.shape[0])
    fit_reverse = model.predict(X6_reverse)
    fit_reverse = fit_reverse.reshape(fit_reverse.shape[0])
    reverse_corr = np.corrcoef(fit, fit_reverse)[0,1]
    fit = (fit + fit_reverse)/2
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, Y6)[0,1],np.mean(np.square(fit-Y6)),np.mean(fit),np.std(fit),reverse_corr]
    fits.append(fit_tmp)
    fit = detrend_int + fit * detrend_slope
    fit = fit.flatten()
    fit_tmp =[np.corrcoef(fit, Y6)[0,1],np.mean(np.square(fit-Y6)),np.mean(fit),np.std(fit),reverse_corr]
    time0 = time.process_time() - start_time
    times2.append([time0])
    fits.append(fit_tmp)
    
    K.clear_session()
    fold_var += 1
    
detrend = array(detrend)
detrend = pd.DataFrame(detrend)
detrend.to_csv(save_path +model_name+"_detrend_tiling.txt", index = False)

fits = array(fits)
fits = pd.DataFrame((fits))
fits.to_csv(save_path +model_name+ "_fits_tiling.txt", index = False)

with open(save_path +model_name+"_time_tiling.txt", "w") as file:
    for row in times:
        s = " ".join(map(str, row))
        file.write(s+'\n')

with open(save_path +model_name+"_pred_time_tiling.txt", "w") as file:
    for row in times2:
        s = " ".join(map(str, row))
        file.write(s+'\n')

2023-12-18 16:13:42.412872: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.07410, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.07410 to 0.06652, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.06652 to 0.06174, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.06174 to 0.05744, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.05744 to 0.05719, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1.h5
Epoch 6/60
Epoch 6: val_loss did not improve from 0.05719
E



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_1/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.09278, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.09278 to 0.08015, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.08015 to 0.07073, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.07073 to 0.06095, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.06095 to 0.05963, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2.h5
Epoch 6/60
Epoch 6: val_loss did not improve from 0.05963
E



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_2/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.08320, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.08320 to 0.06324, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.06324 to 0.06229, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.06229 to 0.05777, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.05777 to 0.05617, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3.h5
Epoch 6/60
Epoch 6: val_loss improved from 0.05617 to 0.055



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_3/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.08557, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.08557 to 0.07101, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.07101 to 0.06683, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.06683 to 0.06364, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.06364 to 0.06124, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4.h5
Epoch 6/60
Epoch 6: val_loss improved from 0.06124 to 0.061



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_4/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.08841, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.08841 to 0.07093, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.07093 to 0.06922, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.06922 to 0.06246, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.06246 to 0.06164, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5.h5
Epoch 6/60
Epoch 6: val_loss improved from 0.06164 to 0.058



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_5/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.07927, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.07927 to 0.07575, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.07575 to 0.06313, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6.h5
Epoch 4/60
Epoch 4: val_loss did not improve from 0.06313
Epoch 5/60
Epoch 5: val_loss improved from 0.06313 to 0.06013, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6.h5
Epoch 6/60
Epoch 6: val_loss improved from 0.06013 to 0.05896, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6.h5
E



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_6/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.08404, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.08404 to 0.07877, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.07877 to 0.07445, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.07445 to 0.06475, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.06475 to 0.06111, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7.h5
Epoch 6/60
Epoch 6: val_loss did not improve from 0.06111
E



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_7/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.07877, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.07877 to 0.06642, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.06642 to 0.06100, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.06100 to 0.05732, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8.h5
Epoch 5/60
Epoch 5: val_loss did not improve from 0.05732
Epoch 6/60
Epoch 6: val_loss improved from 0.05732 to 0.05569, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8.h5
E



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_8/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.07522, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_9.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.07522 to 0.07102, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_9.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.07102 to 0.05889, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_9.h5
Epoch 4/60
Epoch 4: val_loss did not improve from 0.05889
Epoch 5/60
Epoch 5: val_loss did not improve from 0.05889
Epoch 6/60
Epoch 6: val_loss improved from 0.05889 to 0.05731, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_9.h5
Epoch 7/60
Epoch 7: val_loss did not improve from 0.05731
Epoch 8/60
Epoch 8: val_loss improved from 0.05731 to 0.05541, saving mode



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_9/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_9/assets


Epoch 1/60
Epoch 1: val_loss improved from inf to 0.07816, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10.h5
Epoch 2/60
Epoch 2: val_loss improved from 0.07816 to 0.07382, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10.h5
Epoch 3/60
Epoch 3: val_loss improved from 0.07382 to 0.06462, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10.h5
Epoch 4/60
Epoch 4: val_loss improved from 0.06462 to 0.06305, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10.h5
Epoch 5/60
Epoch 5: val_loss improved from 0.06305 to 0.06034, saving model to /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10.h5
Epoch 6/60
Epoch 6: val_loss did not improve from 0.06



INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10/assets


INFO:tensorflow:Assets written to: /Users/Brody1/Dropbox/Northwestern/DNA_Cyclizability/benchmarks/deep-learning/ir_lstm_separateC0s_tiling_10/assets




In [14]:
display_fits(fits)

Average correlation on tiling: 0.9013694587755691 
Average MSE on tiling: 0.04777046223240339 
Average correlation on random: 0.9220625033300693 
Average MSE on random: 0.022342553494513494 
Average correlation on ChrV: 0.7283251672048034 
Average MSE on ChrV: 0.15735308762791916 
Average correlation on CN: 0.8499667871598776 
Average MSE on CN: 0.06589976338471867


In [15]:
nuc_pred = model.predict(X1)
random_pred = model.predict(X3)
tiling_pred = model.predict(X5)
chrv_pred = model.predict(X6)



In [16]:
print(np.corrcoef(nuc_pred.flatten(), Y1)[0,1])
print(np.corrcoef(random_pred.flatten(), Y3)[0,1])
print(np.corrcoef(tiling_pred.flatten(), Y5)[0,1])
print(np.corrcoef(chrv_pred.flatten(), Y6)[0,1])

0.8552795186859308
0.9260616633253925
0.9292724214132013
0.7317199214228032


In [17]:
Y1_new = find_c0new(data_cerevisiae_nucle, aa=[1, 0.39, 1.18]).astype(float)
Y3_new = find_c0new(data_random_library, aa=[1, 0.39, 1.18]).astype(float)
Y6_new = find_c0new(data_chr5, aa=[1, 0.39, 1.18]).astype(float)

In [18]:
data_chr5["C0"][:6], Y6[:6], Y6_new[:6]

(0   -0.069223
 1   -0.688533
 2   -0.464485
 3    0.383790
 4    0.122761
 5   -0.189704
 Name: C0, dtype: float64,
 array([ 0.06143367, -0.76210367, -0.45450441,  0.30210197,  0.06623511,
        -0.21504718]),
 array([ 0.04252972, -0.74574527, -0.46804797,  0.31988126,  0.08478749,
        -0.22350587]))

In [19]:
print(np.corrcoef(nuc_pred.flatten(), Y1_new)[0,1])
print(np.corrcoef(random_pred.flatten(), Y3_new)[0,1])
print(np.corrcoef(tiling_pred.flatten(), Y5)[0,1])
print(np.corrcoef(chrv_pred.flatten(), Y6_new)[0,1])

0.8622618087092941
0.9209422179638825
0.9292724214132013
0.7363685624221729


In [21]:
Y3_other_new = find_c0new(data_random_library, aa=[1, 1.66, 1.40]).astype(float)
print(np.corrcoef(random_pred.flatten(), Y3_other_new)[0,1])

0.9263142339515524
