## One hot models
include one-hot CNN and one-hot Residual models

In [4]:
import tensorflow as tf
import h5py 
import scipy.stats
import numpy as np
import os
import pandas as pd
import glob
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
celltype_list = ['K562','HepG2']
model_list = ['ResNet','archiv']
#model_list = ['rep_cnn','ResNet','MPRAnn']

In [5]:
model_dir

'../model/lenti_MPRA/ResNet_HepG2.h5'

In [6]:
tf.get_logger().setLevel('ERROR')
for celltype in celltype_list:
    exp_df = pd.read_csv('../data/CAGI/'+celltype+'/metadata.csv')
    target = exp_df['6'].values.tolist()
    file = h5py.File("../data/CAGI/"+celltype+"/onehot.h5", "r")
    alt = file['alt']
    ref = file['ref']
    for model in model_list:
        model_dir = glob.glob('../model/lenti_MPRA/%s*%s*'%(model,celltype))[0]
        print(model_dir.split('/')[-1])
        model = tf.keras.models.load_model(model_dir)
        alt_pred = model.predict(alt,verbose=0)
        ref_pred = model.predict(ref,verbose = 0)
        pred = alt_pred - ref_pred
        perf = {}
        for exp in exp_df['8'].unique():
            sub_df = exp_df[exp_df['8'] == exp]
            exp_target = np.array(target)[sub_df.index.to_list()]
            exp_pred = np.squeeze(pred)[sub_df.index.to_list()]
            perf[exp] = scipy.stats.pearsonr(exp_pred,exp_target)[0]
        print(perf)
        print(np.mean(list(perf.values())))
    

ResNet_K562.h5




{'PKLR': 0.6009966832410513}
0.6009966832410513
archiv_K562.h5




{'PKLR': 0.6121411310204448}
0.6121411310204448
ResNet_HepG2.h5




{'LDLR': 0.44802903058086224, 'SORT1': 0.49767967879343145, 'F9': 0.5081893348033798}
0.4846326813925579
archiv_HepG2.h5




{'LDLR': 0.6111832917528024, 'SORT1': 0.43722388204382634, 'F9': 0.5951978150932921}
0.5478683296299737


## Embedding models
GPN embedding trained models

In [1]:
import tensorflow as tf
import h5py 
import scipy.stats
import numpy as np
import os
import pandas as pd
import glob
os.environ['CUDA_VISIBLE_DEVICES'] = '3'
celltype_list = ['K562','HepG2']
model_list = ['nt','gpn','sei']

2024-06-04 13:25:44.764333: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
for model_n in model_list:
    print(model_n)    
    for celltype in celltype_list:
        print(celltype)
        data = '../data/CAGI/230_embed/%s.h5'%model_n
        model_dir = glob.glob('../model/lenti_MPRA/%s*%s*'%(model_n,celltype))

        if len(model_dir) != 1:
             model_dir = glob.glob('../model/lenti_MPRA/%s1.0*%s*'%(model_n,celltype))[0]
        else:
            model_dir = model_dir[0]


        file = h5py.File(data,'r')
        model = tf.keras.models.load_model(model_dir)
        #Read CAGI metadata
        exp_df = pd.read_csv('../data/CAGI/230/final_cagi_metadata.csv')
        target = exp_df['6'].values.tolist()

        #zero-shot predictions
        alt_pred = model.predict(file['alt'],verbose = 0)
        ref_pred = model.predict(file['ref'],verbose=0)
        pred = alt_pred-ref_pred

        #evaluations
        perf = {}
        if celltype == 'K562':
            exp_list = ['PKLR']
        else:
            exp_list = ['LDLR','SORT1','F9']
        for exp in exp_list:
            sub_df = exp_df[exp_df['8'] == exp]
            exp_target = np.array(target)[sub_df.index.to_list()]
            exp_pred = np.squeeze(pred)[sub_df.index.to_list()]
            perf[exp] = scipy.stats.pearsonr(exp_pred,exp_target)[0]
        print(perf)
        print(np.mean(list(perf.values())))

nt
K562




{'PKLR': 0.19751416149787118}
0.19751416149787118
HepG2




{'LDLR': 0.23770915408413978, 'SORT1': 0.1593171847183479, 'F9': 0.15761628410076345}
0.18488087430108371
gpn
K562




{'PKLR': 0.4374305979249826}
0.4374305979249826
HepG2




{'LDLR': 0.2731957664281832, 'SORT1': 0.28335733297106114, 'F9': 0.4384242616828583}
0.3316591203607009
sei
K562




{'PKLR': 0.7006008564153172}
0.7006008564153172
HepG2




{'LDLR': 0.602846117631191, 'SORT1': 0.5720822845392619, 'F9': 0.5619209740695873}
0.5789497920800134


In [4]:
model

<keras.engine.functional.Functional at 0x7f566bd3d850>

In [6]:
glob.glob('../model/lenti_MPRA/%s*%s*'%(model,celltype))

[]