In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [3]:
import h5py
import sys
import numpy as np
import tensorflow as tf
import mpra_model
from sklearn import model_selection
import scipy.stats
celltype = 'HepG2'
file = '../data/lenti_MPRA/'+celltype+'_data.h5'

2024-05-31 11:35:04.723661: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
f = h5py.File(file, 'r')
x_train = f['onehot_train'][:]
x_valid = f['onehot_valid'][:]
x_test = f['onehot_test'][:]
y_train = f['y_train'][:]
y_valid = f['y_valid'][:]
y_test = f['y_test'][:]

In [4]:
cnn_config = {
    'activation':'exponential',
    'reduce_dim': 196,
    'conv1_filter':196,
    'conv1_kernel':7,
    'dropout1':0.2,
    'res_pool':5,
    'res_dropout':0.2,
    'conv2_filter':256,
    'conv2_kernel':7,
    'pool2_size':4,
    'dropout2':0.2,
    'dense':512,
    'dense2':256
}

In [5]:
model_func = getattr(mpra_model,'LegNet')
model = model_func((230,4),config = cnn_config)

2024-05-31 11:35:09.113140: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 79078 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:85:00.0, compute capability: 8.0


In [6]:
for model in ['ResNet','rep_cnn']:
    save_model  = '../model/lenti_MPRA/%s_%s.h5'%(model,celltype)
    
    model_func = getattr(mpra_model,model)
    model = model_func((230,4),config = cnn_config)
    
    earlyStopping_callback = tf.keras.callbacks.EarlyStopping(
            patience=10, restore_best_weights=True
        )
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', factor=0.2,
            patience=5, min_lr=1e-6)

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

    checkpoint = tf.keras.callbacks.ModelCheckpoint(save_model,
                                    monitor='val_loss',
                                    save_best_only=True,
                                    mode = 'min',
                                    save_freq='epoch',)
    model.compile(
                loss="mean_squared_error",
                metrics=["mse", "mae"],
                optimizer=optimizer,
            )
    
    result = model.fit(x_train,y_train,
        batch_size=128,
        validation_data=(x_valid,y_valid),
        epochs=100,
        shuffle=True,
        verbose=2,
        callbacks=[earlyStopping_callback,checkpoint,reduce_lr],
    )

    y_pred = model.predict(x_test)
    pr = scipy.stats.pearsonr(np.squeeze(y_pred),squeeze(y_test))[0]

    print(pr)
    del model

2024-05-22 12:58:01.080562: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 79078 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:85:00.0, compute capability: 8.0


Epoch 1/100


2024-05-22 12:58:05.358415: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8800
2024-05-22 12:58:05.631255: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-05-22 12:58:05.648565: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x561602c87160 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-05-22 12:58:05.648604: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA A100 80GB PCIe, Compute Capability 8.0
2024-05-22 12:58:05.651822: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-05-22 12:58:05.761630: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the pro

1537/1537 - 29s - loss: 0.4584 - mse: 0.4584 - mae: 0.5197 - val_loss: 0.6925 - val_mse: 0.6925 - val_mae: 0.6717 - lr: 0.0010 - 29s/epoch - 19ms/step
Epoch 2/100
1537/1537 - 23s - loss: 0.3750 - mse: 0.3750 - mae: 0.4652 - val_loss: 0.3369 - val_mse: 0.3369 - val_mae: 0.4380 - lr: 0.0010 - 23s/epoch - 15ms/step
Epoch 3/100
1537/1537 - 23s - loss: 0.3483 - mse: 0.3483 - mae: 0.4482 - val_loss: 0.3209 - val_mse: 0.3209 - val_mae: 0.4286 - lr: 0.0010 - 23s/epoch - 15ms/step
Epoch 4/100
1537/1537 - 23s - loss: 0.3300 - mse: 0.3300 - mae: 0.4366 - val_loss: 0.3382 - val_mse: 0.3382 - val_mae: 0.4280 - lr: 0.0010 - 23s/epoch - 15ms/step
Epoch 5/100
1537/1537 - 23s - loss: 0.3166 - mse: 0.3166 - mae: 0.4272 - val_loss: 0.3476 - val_mse: 0.3476 - val_mae: 0.4320 - lr: 0.0010 - 23s/epoch - 15ms/step
Epoch 6/100
1537/1537 - 23s - loss: 0.3035 - mse: 0.3035 - mae: 0.4193 - val_loss: 0.3118 - val_mse: 0.3118 - val_mae: 0.4167 - lr: 0.0010 - 23s/epoch - 15ms/step
Epoch 7/100
1537/1537 - 23s - loss

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [9]:
pr = scipy.stats.pearsonr(np.squeeze(y_pred),np.squeeze(y_test))[0]

In [10]:
pr

0.7444173017011313