## One hot models
include one-hot CNN and one-hot Residual models

In [12]:
import tensorflow as tf
import h5py 
import scipy.stats
import numpy as np
import os
import pandas as pd
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
celltype_list = ['K562','HepG2']

In [12]:
tf.get_logger().setLevel('ERROR')
for celltype in celltype_list:
    cnn_dir = '../model/lenti_MPRA/lenti_MPRA_onehot/'+celltype+'/base_CNN_model.h5'
    rb_dir = '../model/lenti_MPRA/lenti_MPRA_onehot/'+celltype+'/ResNet.h5'
    print('###########' + celltype + '########')
    exp_df = pd.read_csv('../data/CAGI/'+celltype+'/metadata.csv')
    target = exp_df['6'].values.tolist()
    file = h5py.File("../data/CAGI/"+celltype+"/onehot.h5", "r")
    alt = file['alt']
    ref = file['ref']
    
    for model_dir in [cnn_dir,rb_dir]:
        print(model_dir.split('/')[-1])
        model = tf.keras.models.load_model(model_dir)
        alt_pred = model.predict(alt,verbose=0)
        ref_pred = model.predict(ref,verbose = 0)
        pred = alt_pred - ref_pred
        perf = {}
        for exp in exp_df['8'].unique():
            sub_df = exp_df[exp_df['8'] == exp]
            exp_target = np.array(target)[sub_df.index.to_list()]
            exp_pred = np.squeeze(pred)[sub_df.index.to_list()]
            perf[exp] = scipy.stats.pearsonr(exp_pred,exp_target)[0]
        print(perf)
        print(np.mean(list(perf.values())))


###########K562########
base_CNN_model.h5


2024-01-17 16:30:10.468787: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-01-17 16:30:10.574442: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8800


{'PKLR': 0.42567543950445913}
0.42567543950445913
ResNet.h5
{'PKLR': 0.5509142235337524}
0.5509142235337524
###########HepG2########
base_CNN_model.h5




{'LDLR': 0.3140119684297847, 'SORT1': 0.26455394176081337, 'F9': 0.3597829131304178}
0.3127829411070053
ResNet.h5
{'LDLR': 0.5142895208051962, 'SORT1': 0.4411468053560604, 'F9': 0.5025298928547497}
0.4859887396720021


## Embedding models
GPN embedding trained models

In [1]:
import tensorflow as tf
import h5py 
import scipy.stats
import numpy as np
import os
import pandas as pd
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
celltype_list = ['K562','HepG2']

2024-03-10 23:20:10.107654: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
for celltype in celltype_list:
    print('######'+celltype+'######')
    #Load model and dataset
    model_dir = '../model/lenti_MPRA/lenti_MPRA_embed/'+celltype+'/layer_32.h5'
    data = '../data/CAGI/230_embed/NT.h5'
    file = h5py.File(data,'r')
    model = tf.keras.models.load_model(model_dir)
    
    #Read CAGI metadata
    exp_df = pd.read_csv('../data/CAGI/230/final_cagi_metadata.csv')
    target = exp_df['6'].values.tolist()

    #zero-shot predictions
    alt_pred = model.predict(file['alt'],verbose = 0)
    ref_pred = model.predict(file['ref'],verbose=0)
    pred = alt_pred-ref_pred

    #evaluations
    perf = {}
    if celltype == 'K562':
        exp_list = ['PKLR']
    else:
        exp_list = ['LDLR','SORT1','F9']
    for exp in exp_list:
        sub_df = exp_df[exp_df['8'] == exp]
        exp_target = np.array(target)[sub_df.index.to_list()]
        exp_pred = np.squeeze(pred)[sub_df.index.to_list()]
        perf[exp] = scipy.stats.pearsonr(exp_pred,exp_target)[0]
    print(perf)
    print(np.mean(list(perf.values())))

######K562######


2024-03-10 23:20:22.629644: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 77581 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:c7:00.0, compute capability: 8.0




2024-03-10 23:20:24.396143: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-03-10 23:20:24.505587: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8800


{'PKLR': 0.24031916713230142}
0.24031916713230142
######HepG2######




{'LDLR': 0.1789985393569604, 'SORT1': 0.1589224895682642, 'F9': 0.07374214779153519}
0.13722105890558658
