In [1]:
import tensorflow as tf
from tensorflow import keras
import os
import glob
import h5py
import numpy as np
import pickle as pkl
from tqdm import tqdm
from sklearn import model_selection
import scipy.stats
from sklearn.metrics import mean_squared_error
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

2024-02-06 01:01:21.646134: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## CHIP experiments

In [2]:
exp_list = glob.glob('../model/chip_model/chip_CNN/*.h5')
for file in exp_list:
    exp = file.split('/')[-1][:-3]

    model = tf.keras.models.load_model(file)
    data = h5py.File('../data/chip/'+exp+'_200.h5','r')

    x_test = np.swapaxes(data['x_test'][()],1,2)
    y_test = data['y_test'][()]
    input_layer = model.input 
    logit_layer = model.layers[-2].output
    logit_model = tf.keras.models.Model(input_layer,logit_layer)
    
    y_pred = logit_model.predict(x_test,batch_size=128)
    test_strength = np.argsort(np.squeeze(y_pred))
    top_k = test_strength[-500:][::-1]
    top_x = x_test[top_k]

    flip_x = np.swapaxes(top_x,1,2)
    np.savez('./top_seq/chip/'+exp+'.npz',flip_x,top_k)
    tf.keras.backend.clear_session()

    

2024-02-06 01:01:28.653906: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 78973 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:47:00.0, compute capability: 8.0
2024-02-06 01:01:29.959920: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8800




2024-02-06 01:01:30.453517: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.




## INSERT

In [3]:
model = tf.keras.models.load_model('../model/RNAenlong/CNN.h5')
dataset = '../data/RNAenlong/insert_dataset.h5'
f = h5py.File(dataset,'r')
x_test = f['X_test'][()]
y_test = f['Y_test'][()]

y_pred = model.predict(x_test)
test_strength = np.argsort(np.squeeze(y_pred))
top_k = test_strength[-500:][::-1]
top_x = x_test[top_k]

flip_x = np.swapaxes(top_x,1,2)
np.savez('./top_seq/insert.npz',flip_x,top_k)

# y_label = np.log(y_test+1)
# pearsonr = scipy.stats.pearsonr(y_label[:,0], y_pred[:,0])
# mse = mean_squared_error(y_label[:,0], y_pred[:,0])
# print("PR:" + str(pearsonr[0]))
# print("MSE:" + str(mse))






## CLIP

In [4]:
exp_list = glob.glob('../model/clip_model/eclip_CNN/*.h5')
for file in exp_list:
    exp = file.split('/')[-1][:-3]

    model = tf.keras.models.load_model(file)
    data = h5py.File('../data/eclip/'+exp+'_K562_200.h5','r')

    x_test = np.swapaxes(data['X_test'][()],1,2)[:,:,:4]
    y_test = data['Y_test'][()]
    input_layer = model.input 
    logit_layer = model.layers[-2].output
    logit_model = tf.keras.models.Model(input_layer,logit_layer)
    
    y_pred = logit_model.predict(x_test,batch_size=128)
    test_strength = np.argsort(np.squeeze(y_pred))
    top_k = test_strength[-500:][::-1]
    top_x = x_test[top_k]

    flip_x = np.swapaxes(top_x,1,2)
    np.savez('./top_seq/clip/'+exp+'.npz',flip_x,top_k)
    tf.keras.backend.clear_session()



## LENTI

In [5]:
for ct in ['HepG2','K562']:
    model = tf.keras.models.load_model('../model/lenti_MPRA/lenti_MPRA_onehot/'+ct+'/ResNet.h5')
    f = h5py.File('../data/lenti_MPRA/'+ct+'_onehot.h5','r')
    x = f['onehot'][()]
    y = f['target'][()]
    x = np.swapaxes(x,1,2)
    x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.1,random_state=42)
    
    y_pred = model.predict(x_test,batch_size=128)
    test_strength = np.argsort(np.squeeze(y_pred))
    top_k = test_strength[-500:][::-1]
    top_x = x_test[top_k]

    flip_x = np.swapaxes(top_x,1,2)
    np.savez('./top_seq/lenti_'+ct+'.npz',flip_x,top_k)



