In [1]:
from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model

import tensorflow as tf

import pandas as pd

import os
import pickle
import numpy as np

import scipy.sparse as sp
import scipy.io as spio

import isolearn.io as isoio
import isolearn.keras as iso


Using TensorFlow backend.


In [2]:
#Load sequence data

df = pd.read_csv('../../data/prepared_data/apa_gtex_data/polyadb_merged_lead_SNPs.csv', sep='\t')


In [11]:
#Create data features

encoder = iso.OneHotEncoder(205)

l_fake = np.zeros((len(df), 13))
l_fake[:, 11] = 1.

ref_onehots = np.concatenate([encoder.encode(row['wide_seq_ext'][175-70:175-70+205])[None, None, :, :] for _, row in df.iterrows()], axis=0)
var_onehots = np.concatenate([encoder.encode(row['wide_seq_ext_var'][175-70:175-70+205])[None, None, :, :] for _, row in df.iterrows()], axis=0)

#Pad
n_pad = 32 - len(df) % 32 if len(df) % 32 != 0 else 0

l_fake = np.concatenate([l_fake, np.zeros((n_pad, 13))], axis=0)
ref_onehots = np.concatenate([ref_onehots, np.zeros((n_pad, 1, 205, 4))], axis=0)
var_onehots = np.concatenate([var_onehots, np.zeros((n_pad, 1, 205, 4))], axis=0)


In [12]:
#Load APARENT Resnet

model_name = 'aparent_all_libs_resnet_no_clinvar_wt_ep_5'

save_dir = os.path.join(os.getcwd(), '../../../aparent-resnet/saved_models')
model_path = os.path.join(save_dir, model_name + '.h5')

aparent_model = load_model(model_path)



In [17]:
#Predict

_, ref_cut_pred = aparent_model.predict(x=[ref_onehots, l_fake], batch_size=32, verbose=True)
_, var_cut_pred = aparent_model.predict(x=[var_onehots, l_fake], batch_size=32, verbose=True)

#Calculate isoform logits
if n_pad > 0 :
    ref_cut_pred = ref_cut_pred[:-n_pad, :]
    var_cut_pred = var_cut_pred[:-n_pad, :]

isoform_start = 0
isoform_end = 205

ref_iso_pred = np.sum(ref_cut_pred[:, isoform_start:isoform_end], axis=1)
var_iso_pred = np.sum(var_cut_pred[:, isoform_start:isoform_end], axis=1)

delta_logodds = np.log(var_iso_pred / (1. - var_iso_pred)) - np.log(ref_iso_pred / (1. - ref_iso_pred))




In [18]:
#Copy the dataframe and store isoform predictions

pred_df = df.copy().reset_index(drop=True)

pred_df['delta_isoform_logodds'] = delta_logodds


In [19]:
#Dump prediction dataframe and cut probability matrix

isoio.dump({'pred_df' : pred_df}, 'apa_gtex_data/' + model_name + '_predictions')