In [1]:
from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model

import tensorflow as tf

import pandas as pd

import os
import pickle
import numpy as np

import scipy.sparse as sp
import scipy.io as spio

import isolearn.io as isoio
import isolearn.keras as iso

from aparent.data.aparent_data_native_pairs import load_data


Using TensorFlow backend.


In [2]:
#Load native pair-wise APA data

file_path = '../../data/prepared_data/apa_leslie_apadb_pair_data/'
native_gens = load_data(batch_size=1, file_path=file_path)


Pair-wise Native APA (APADB + Leslie) size = 29756
Training set size = 0
Validation set size = 0
Test set size = 29756


In [3]:
#Load APADB-tuned APARENT model

model_name = 'aparent_apadb_fitted'

save_dir = os.path.join(os.getcwd(), '../../aparent/saved_models')
model_path = os.path.join(save_dir, model_name + '.h5')

apadb_model = load_model(model_path)


In [4]:
#Predict from test data generator

iso_pred, cut_prox, cut_dist = apadb_model.predict_generator(native_gens['all'], workers=4, use_multiprocessing=True)

#Calculate isoform logits
iso_pred = np.ravel(iso_pred)
logodds_pred = np.log(iso_pred / (1. - iso_pred))


In [5]:
#Copy the test set dataframe and store isoform predictions

native_df = native_gens['all'].sources['df'].reset_index().copy()

native_df['iso_pred'] = iso_pred
native_df['logodds_pred'] = logodds_pred

native_df = native_df[['gene_id', 'iso_pred', 'logodds_pred']]


In [6]:
#Dump prediction dataframe and cut probability matrices

isoio.dump({'native_df' : native_df, 'cut_prox' : sp.csr_matrix(cut_prox), 'cut_dist' : sp.csr_matrix(cut_dist)}, 'apa_leslie_apadb_pair_data/' + model_name + '_predictions')