In [1]:
from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model

import tensorflow as tf

import pandas as pd

import os
import pickle
import numpy as np

import scipy.sparse as sp
import scipy.io as spio

import isolearn.io as isoio
import isolearn.keras as iso


Using TensorFlow backend.


In [2]:
#Load sequence data

df = pd.read_csv('../../data/prepared_data/apa_gtex_data/polyadb_merged_lead_SNPs.csv', sep='\t')


In [3]:
#Create data features

encoder = iso.OneHotEncoder(205)

l_fake = np.zeros((len(df), 13))
l_fake[:, 11] = 1.

ref_onehots = np.concatenate([encoder.encode(row['wide_seq_ext'][175-70:175-70+205])[None, None, :, :] for _, row in df.iterrows()], axis=0)
var_onehots = np.concatenate([encoder.encode(row['wide_seq_ext_var'][175-70:175-70+205])[None, None, :, :] for _, row in df.iterrows()], axis=0)

#Pad
n_pad = 32 - len(df) % 32 if len(df) % 32 != 0 else 0

l_fake = np.concatenate([l_fake, np.zeros((n_pad, 13))], axis=0)
ref_onehots = np.concatenate([ref_onehots, np.zeros((n_pad, 1, 205, 4))], axis=0)
var_onehots = np.concatenate([var_onehots, np.zeros((n_pad, 1, 205, 4))], axis=0)


In [4]:
#Load APARENT Resnet

model_name = 'aparent_all_libs_resnet_no_clinvar_wt_ep_5'

save_dir = os.path.join(os.getcwd(), '../../../aparent-resnet/saved_models')
model_path = os.path.join(save_dir, model_name + '.h5')

aparent_model = load_model(model_path)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [5]:
#Predict

_, ref_cut_pred = aparent_model.predict(x=[ref_onehots, l_fake], batch_size=32, verbose=True)
_, var_cut_pred = aparent_model.predict(x=[var_onehots, l_fake], batch_size=32, verbose=True)

#Calculate isoform logits
if n_pad > 0 :
    ref_cut_pred = ref_cut_pred[:-n_pad, :]
    var_cut_pred = var_cut_pred[:-n_pad, :]

isoform_start = 0
isoform_end = 205

ref_iso_pred = np.sum(ref_cut_pred[:, isoform_start:isoform_end], axis=1)
var_iso_pred = np.sum(var_cut_pred[:, isoform_start:isoform_end], axis=1)

delta_logodds = np.log(var_iso_pred / (1. - var_iso_pred)) - np.log(ref_iso_pred / (1. - ref_iso_pred))




In [15]:
#Load Tissue scaler and predict variant effects
import keras.backend as K

model_name = "human_convnet_16_16_no_dense_linear_leslie_hek293_brain_all_cuts_retry_ensemble"

n_bootstraps = 10

save_dir = '../../../aparent-resnet/saved_models'

ref_tissue_scores = []
var_tissue_scores = []

for bootstrap_ix in range(n_bootstraps) :
    
    #Clear keras session
    K.clear_session()
    
    print("Predicting using model " + str(bootstrap_ix) + "...")
    
    #Load model(s)
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    model_path = os.path.join(save_dir, model_name + '_' + str(bootstrap_ix) + '_pas_model' + '.h5')
    tissue_model = load_model(model_path)
    
    #Predict
    ref_tissue_score = tissue_model.predict(x=[np.tile(ref_onehots[:, None, ...], (1, 10, 1, 1, 1))], batch_size=32, verbose=True)
    var_tissue_score = tissue_model.predict(x=[np.tile(var_onehots[:, None, ...], (1, 10, 1, 1, 1))], batch_size=32, verbose=True)

    ref_tissue_score = ref_tissue_score[:, 0, :]
    var_tissue_score = var_tissue_score[:, 0, :]

    #Calculate isoform delta tissue logits
    if n_pad > 0 :
        ref_tissue_score = ref_tissue_score[:-n_pad, :]
        var_tissue_score = var_tissue_score[:-n_pad, :]
    
    ref_tissue_scores.append(ref_tissue_score[None, ...])
    var_tissue_scores.append(var_tissue_score[None, ...])

ref_tissue_score = np.mean(np.concatenate(ref_tissue_scores, axis=0), axis=0)
var_tissue_score = np.mean(np.concatenate(var_tissue_scores, axis=0), axis=0)
    

Predicting using model 0...
Predicting using model 1...
Predicting using model 2...
Predicting using model 3...
Predicting using model 4...
Predicting using model 5...
Predicting using model 6...
Predicting using model 7...
Predicting using model 8...
Predicting using model 9...


In [16]:

ref_delta_tissue_score = ref_tissue_score[:, 1] - ref_tissue_score[:, 0]
var_delta_tissue_score = var_tissue_score[:, 1] - var_tissue_score[:, 0]


In [17]:
#Copy the dataframe and store isoform predictions

pred_df = df.copy().reset_index(drop=True)

pred_df['delta_isoform_logodds'] = delta_logodds
pred_df['ref_delta_tissue_score'] = ref_delta_tissue_score
pred_df['var_delta_tissue_score'] = var_delta_tissue_score

pred_df['ref_tissue_score_1'] = ref_tissue_score[:, 0]
pred_df['ref_tissue_score_2'] = ref_tissue_score[:, 1]
pred_df['var_tissue_score_1'] = var_tissue_score[:, 0]
pred_df['var_tissue_score_2'] = var_tissue_score[:, 1]


In [18]:
#Dump prediction dataframe and cut probability matrix

isoio.dump({'pred_df' : pred_df}, 'apa_gtex_data/' + model_name + '_predictions')